diff --git "a/profile_trace/iteration_22528/rank4_trace.json" "b/profile_trace/iteration_22528/rank4_trace.json" new file mode 100644--- /dev/null +++ "b/profile_trace/iteration_22528/rank4_trace.json" @@ -0,0 +1,157189 @@ + +{ + "schemaVersion": 1, + "deviceProperties": [ + { + "id": 0, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 1, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 2, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 3, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 4, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 5, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 6, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 7, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + } + ], + "cupti_version": 22, + "cuda_runtime_version": 12040, + "cuda_driver_version": 12080, + "distributedInfo": {"backend": "nccl", "rank": 4, "world_size": 8, "pg_count": 1, "pg_config": [{"pg_name": "0", "pg_desc": "default_pg", "backend_config": "cuda:nccl", "pg_size": 8, "ranks": [0, 1, 2, 3, 4, 5, 6, 7]}], "nccl_version": "2.21.5"}, + "record_shapes": 1, + "trace_id": "36EE842BF0CD41DEA711FBBEC3A42673", + "traceEvents": [ + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: DivBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937202823.022, "dur": 138.094, + "args": { + "External id": 977409,"Record function id": 0, "Sequence number": 10552468, "Fwd thread id": 1, "Ev Idx": 0 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "DivBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937202846.556, "dur": 104.153, + "args": { + "External id": 977410,"Sequence number": 10552468, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 1 + } + }, + { + "ph": "f", "id": 1, "pid": 2338710, "tid": 2379450, "ts": 6345937202846.556, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338710, "tid": 2379450, + "ts": 6345937202856.255, "dur": 90.990, + "args": { + "External id": 977411,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 2 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937202976.779, "dur": 340.724, + "args": { + "External id": 977412,"Record function id": 0, "Ev Idx": 3 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937203097.930, "dur": 115.959, + "args": { + "External id": 977413,"Record function id": 0, "Ev Idx": 4 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.25", "pid": 2338710, "tid": 2379450, + "ts": 6345937203141.632, "dur": 56.359, + "args": { + "External id": 977414,"Record function id": 0, "Ev Idx": 5 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937203219.905, "dur": 3.640, + "args": { + "External id": 977415,"Sequence number": 10552467, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 6 + } + }, + { + "ph": "f", "id": 2, "pid": 2338710, "tid": 2379450, "ts": 6345937203219.905, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937203228.645, "dur": 82.282, + "args": { + "External id": 977416,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 7 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937203248.492, "dur": 61.726, + "args": { + "External id": 977417,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 8 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937203262.412, "dur": 5.359, + "args": { + "External id": 977418,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937203332.560, "dur": 38154.969, + "args": { + "External id": 977419,"Record function id": 0, "Sequence number": 10552465, "Fwd thread id": 1, "Ev Idx": 10 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937203336.545, "dur": 38133.286, + "args": { + "External id": 977420,"Sequence number": 10552465, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11 + } + }, + { + "ph": "f", "id": 3, "pid": 2338710, "tid": 2379450, "ts": 6345937203336.545, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937203390.414, "dur": 5.396, + "args": { + "External id": 977421,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345937203403.688, "dur": 37724.308, + "args": { + "External id": 977422,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937203407.545, "dur": 37720.058, + "args": { + "External id": 977423,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 14 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937203413.190, "dur": 8.500, + "args": { + "External id": 977424,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937203424.945, "dur": 37700.948, + "args": { + "External id": 977425,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 16 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338710, "tid": 2379450, + "ts": 6345937241134.611, "dur": 0.743, + "args": { + "External id": 977426,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 17 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338710, "tid": 2379450, + "ts": 6345937241139.233, "dur": 4.537, + "args": { + "External id": 977427,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 18 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338710, "tid": 2379450, + "ts": 6345937241141.215, "dur": 2.270, + "args": { + "External id": 977428,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338710, "tid": 2379450, + "ts": 6345937241152.201, "dur": 37.201, + "args": { + "External id": 977429,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 20 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338710, "tid": 2379450, + "ts": 6345937241199.627, "dur": 54.493, + "args": { + "External id": 977430,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338710, "tid": 2379450, + "ts": 6345937241202.005, "dur": 51.822, + "args": { + "External id": 977431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 22 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338710, "tid": 2379450, + "ts": 6345937241203.970, "dur": 49.345, + "args": { + "External id": 977432,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 23 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937241507.268, "dur": 27.013, + "args": { + "External id": 977433,"Record function id": 0, "Sequence number": 10552464, "Fwd thread id": 1, "Ev Idx": 24 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937241511.043, "dur": 19.355, + "args": { + "External id": 977434,"Sequence number": 10552464, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 25 + } + }, + { + "ph": "f", "id": 4, "pid": 2338710, "tid": 2379450, "ts": 6345937241511.043, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937241516.698, "dur": 13.364, + "args": { + "External id": 977435,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 26 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937241521.845, "dur": 7.947, + "args": { + "External id": 977436,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 27 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937241542.231, "dur": 133.234, + "args": { + "External id": 977437,"Record function id": 0, "Sequence number": 10552463, "Fwd thread id": 1, "Ev Idx": 28 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937241543.242, "dur": 123.478, + "args": { + "External id": 977438,"Sequence number": 10552463, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 29 + } + }, + { + "ph": "f", "id": 5, "pid": 2338710, "tid": 2379450, "ts": 6345937241543.242, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937241548.470, "dur": 117.605, + "args": { + "External id": 977439,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 30 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345937241555.228, "dur": 47.835, + "args": { + "External id": 977440,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 31 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937241562.310, "dur": 7.595, + "args": { + "External id": 977441,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 32 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345937241573.080, "dur": 29.561, + "args": { + "External id": 977442,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 33 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345937241578.787, "dur": 23.385, + "args": { + "External id": 977443,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 34 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345937241606.288, "dur": 10.344, + "args": { + "External id": 977444,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 35 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937241610.900, "dur": 5.290, + "args": { + "External id": 977445,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 36 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937241618.283, "dur": 46.611, + "args": { + "External id": 977446,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 37 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937241681.892, "dur": 80.927, + "args": { + "External id": 977447,"Record function id": 0, "Sequence number": 10552462, "Fwd thread id": 1, "Ev Idx": 38 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937241683.098, "dur": 75.896, + "args": { + "External id": 977448,"Sequence number": 10552462, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 39 + } + }, + { + "ph": "f", "id": 6, "pid": 2338710, "tid": 2379450, "ts": 6345937241683.098, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937241687.814, "dur": 70.707, + "args": { + "External id": 977449,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "3"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 40 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345937241694.532, "dur": 25.056, + "args": { + "External id": 977450,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 41 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937241696.144, "dur": 4.273, + "args": { + "External id": 977451,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 42 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345937241701.503, "dur": 17.715, + "args": { + "External id": 977452,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 43 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345937241705.452, "dur": 13.204, + "args": { + "External id": 977453,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 44 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2379450, + "ts": 6345937241722.601, "dur": 11.392, + "args": { + "External id": 977454,"Record function id": 0, "Concrete Inputs": ["", "2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 45 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937241730.762, "dur": 1.362, + "args": { + "External id": 977455,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 46 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937241735.044, "dur": 22.920, + "args": { + "External id": 977456,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 47 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937241767.235, "dur": 227.772, + "args": { + "External id": 977457,"Record function id": 0, "Sequence number": 10552461, "Fwd thread id": 1, "Ev Idx": 48 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937241768.326, "dur": 221.514, + "args": { + "External id": 977458,"Sequence number": 10552461, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 49 + } + }, + { + "ph": "f", "id": 7, "pid": 2338710, "tid": 2379450, "ts": 6345937241768.326, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937241772.296, "dur": 216.882, + "args": { + "External id": 977459,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 50 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345937241776.930, "dur": 19.126, + "args": { + "External id": 977460,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 51 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937241777.879, "dur": 2.815, + "args": { + "External id": 977461,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 52 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345937241781.421, "dur": 14.340, + "args": { + "External id": 977462,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 53 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345937241782.260, "dur": 13.097, + "args": { + "External id": 977463,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 54 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345937241797.294, "dur": 2.643, + "args": { + "External id": 977464,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 55 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937241798.710, "dur": 0.972, + "args": { + "External id": 977465,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 56 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937241804.438, "dur": 183.460, + "args": { + "External id": 977466,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 57 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937242001.689, "dur": 166.441, + "args": { + "External id": 977467,"Record function id": 0, "Sequence number": 10552460, "Fwd thread id": 1, "Ev Idx": 58 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937242003.001, "dur": 158.344, + "args": { + "External id": 977468,"Sequence number": 10552460, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 59 + } + }, + { + "ph": "f", "id": 8, "pid": 2338710, "tid": 2379450, "ts": 6345937242003.001, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937242005.001, "dur": 155.773, + "args": { + "External id": 977469,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 60 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345937242026.071, "dur": 59.340, + "args": { + "External id": 977470,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 61 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937242028.014, "dur": 5.072, + "args": { + "External id": 977471,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 62 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345937242034.310, "dur": 17.554, + "args": { + "External id": 977472,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 63 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345937242035.506, "dur": 15.899, + "args": { + "External id": 977473,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 64 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345937242089.040, "dur": 5.677, + "args": { + "External id": 977474,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 65 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937242093.574, "dur": 0.879, + "args": { + "External id": 977475,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 66 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937242095.597, "dur": 63.923, + "args": { + "External id": 977476,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 67 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937242179.765, "dur": 52.643, + "args": { + "External id": 977477,"Record function id": 0, "Sequence number": 10552459, "Fwd thread id": 1, "Ev Idx": 68 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937242181.598, "dur": 1.478, + "args": { + "External id": 977478,"Sequence number": 10552459, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 69 + } + }, + { + "ph": "f", "id": 9, "pid": 2338710, "tid": 2379450, "ts": 6345937242181.598, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937242187.214, "dur": 41.514, + "args": { + "External id": 977479,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 70 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937242189.938, "dur": 38.072, + "args": { + "External id": 977480,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 71 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937242202.106, "dur": 2.779, + "args": { + "External id": 977481,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 72 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937242238.450, "dur": 2362.862, + "args": { + "External id": 977482,"Record function id": 0, "Sequence number": 10552457, "Fwd thread id": 1, "Ev Idx": 73 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937242240.314, "dur": 2321.650, + "args": { + "External id": 977483,"Sequence number": 10552457, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 74 + } + }, + { + "ph": "f", "id": 10, "pid": 2338710, "tid": 2379450, "ts": 6345937242240.314, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937242285.549, "dur": 4.630, + "args": { + "External id": 977484,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 75 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345937242293.228, "dur": 2017.515, + "args": { + "External id": 977485,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 76 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937242295.160, "dur": 2015.245, + "args": { + "External id": 977486,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 77 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937242298.452, "dur": 6.593, + "args": { + "External id": 977487,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 78 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937242306.228, "dur": 2003.092, + "args": { + "External id": 977488,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 79 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338710, "tid": 2379450, + "ts": 6345937244314.885, "dur": 0.433, + "args": { + "External id": 977489,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 80 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338710, "tid": 2379450, + "ts": 6345937244317.471, "dur": 6.309, + "args": { + "External id": 977490,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 81 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338710, "tid": 2379450, + "ts": 6345937244322.365, "dur": 1.229, + "args": { + "External id": 977491,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 82 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338710, "tid": 2379450, + "ts": 6345937244329.081, "dur": 26.139, + "args": { + "External id": 977492,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 83 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338710, "tid": 2379450, + "ts": 6345937244361.973, "dur": 45.104, + "args": { + "External id": 977493,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 84 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338710, "tid": 2379450, + "ts": 6345937244363.439, "dur": 43.439, + "args": { + "External id": 977494,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 85 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338710, "tid": 2379450, + "ts": 6345937244365.014, "dur": 41.521, + "args": { + "External id": 977495,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 86 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345937244575.635, "dur": 21.431, + "args": { + "External id": 977496,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 87 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937244614.015, "dur": 18.876, + "args": { + "External id": 977497,"Record function id": 0, "Sequence number": 10552456, "Fwd thread id": 1, "Ev Idx": 88 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937244615.549, "dur": 14.159, + "args": { + "External id": 977498,"Sequence number": 10552456, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 89 + } + }, + { + "ph": "f", "id": 11, "pid": 2338710, "tid": 2379450, "ts": 6345937244615.549, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937244622.192, "dur": 7.272, + "args": { + "External id": 977499,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 90 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937244624.031, "dur": 5.207, + "args": { + "External id": 977500,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 91 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937244637.180, "dur": 86.489, + "args": { + "External id": 977501,"Record function id": 0, "Sequence number": 10552455, "Fwd thread id": 1, "Ev Idx": 92 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937244638.186, "dur": 79.694, + "args": { + "External id": 977502,"Sequence number": 10552455, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 93 + } + }, + { + "ph": "f", "id": 12, "pid": 2338710, "tid": 2379450, "ts": 6345937244638.186, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937244640.458, "dur": 76.872, + "args": { + "External id": 977503,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 94 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345937244649.339, "dur": 23.635, + "args": { + "External id": 977504,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 95 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937244651.255, "dur": 3.983, + "args": { + "External id": 977505,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 96 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345937244656.297, "dur": 16.357, + "args": { + "External id": 977506,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 97 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345937244657.930, "dur": 14.204, + "args": { + "External id": 977507,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 98 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345937244674.817, "dur": 4.577, + "args": { + "External id": 977508,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 99 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937244677.548, "dur": 1.332, + "args": { + "External id": 977509,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937244680.607, "dur": 35.821, + "args": { + "External id": 977510,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937244728.493, "dur": 65.953, + "args": { + "External id": 977511,"Record function id": 0, "Sequence number": 10552454, "Fwd thread id": 1, "Ev Idx": 102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937244729.615, "dur": 61.694, + "args": { + "External id": 977512,"Sequence number": 10552454, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 103 + } + }, + { + "ph": "f", "id": 13, "pid": 2338710, "tid": 2379450, "ts": 6345937244729.615, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937244735.437, "dur": 55.427, + "args": { + "External id": 977513,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "2"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345937244737.850, "dur": 23.796, + "args": { + "External id": 977514,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937244738.973, "dur": 3.193, + "args": { + "External id": 977515,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345937244745.515, "dur": 15.801, + "args": { + "External id": 977516,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345937244746.540, "dur": 14.287, + "args": { + "External id": 977517,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2379450, + "ts": 6345937244763.161, "dur": 8.379, + "args": { + "External id": 977518,"Record function id": 0, "Concrete Inputs": ["", "2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937244769.723, "dur": 1.223, + "args": { + "External id": 977519,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937244772.427, "dur": 17.813, + "args": { + "External id": 977520,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937244801.670, "dur": 141.284, + "args": { + "External id": 977521,"Record function id": 0, "Sequence number": 10552453, "Fwd thread id": 1, "Ev Idx": 112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937244803.003, "dur": 135.802, + "args": { + "External id": 977522,"Sequence number": 10552453, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 113 + } + }, + { + "ph": "f", "id": 14, "pid": 2338710, "tid": 2379450, "ts": 6345937244803.003, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937244804.950, "dur": 133.423, + "args": { + "External id": 977523,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345937244806.531, "dur": 20.444, + "args": { + "External id": 977524,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937244807.720, "dur": 2.155, + "args": { + "External id": 977525,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345937244813.339, "dur": 13.316, + "args": { + "External id": 977526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345937244814.526, "dur": 11.707, + "args": { + "External id": 977527,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345937244827.966, "dur": 5.120, + "args": { + "External id": 977528,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937244832.034, "dur": 0.804, + "args": { + "External id": 977529,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937244833.826, "dur": 103.456, + "args": { + "External id": 977530,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937244948.055, "dur": 190.080, + "args": { + "External id": 977531,"Record function id": 0, "Sequence number": 10552452, "Fwd thread id": 1, "Ev Idx": 122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937244949.356, "dur": 152.320, + "args": { + "External id": 977532,"Sequence number": 10552452, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 123 + } + }, + { + "ph": "f", "id": 15, "pid": 2338710, "tid": 2379450, "ts": 6345937244949.356, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937244952.995, "dur": 148.182, + "args": { + "External id": 977533,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345937244953.897, "dur": 20.803, + "args": { + "External id": 977534,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937244954.753, "dur": 2.379, + "args": { + "External id": 977535,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345937244957.855, "dur": 16.550, + "args": { + "External id": 977536,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345937244962.360, "dur": 11.480, + "args": { + "External id": 977537,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345937244975.940, "dur": 4.790, + "args": { + "External id": 977538,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937244977.103, "dur": 3.431, + "args": { + "External id": 977539,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937244981.481, "dur": 118.082, + "args": { + "External id": 977540,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345937245109.008, "dur": 27.136, + "args": { + "External id": 977541,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937245145.933, "dur": 46.581, + "args": { + "External id": 977542,"Record function id": 0, "Sequence number": 10552451, "Fwd thread id": 1, "Ev Idx": 133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937245147.254, "dur": 1.216, + "args": { + "External id": 977543,"Sequence number": 10552451, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 134 + } + }, + { + "ph": "f", "id": 16, "pid": 2338710, "tid": 2379450, "ts": 6345937245147.254, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937245151.440, "dur": 37.674, + "args": { + "External id": 977544,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937245154.101, "dur": 34.439, + "args": { + "External id": 977545,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937245163.621, "dur": 0.866, + "args": { + "External id": 977546,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937245197.854, "dur": 3442.169, + "args": { + "External id": 977547,"Record function id": 0, "Sequence number": 10552449, "Fwd thread id": 1, "Ev Idx": 138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937245201.901, "dur": 3398.825, + "args": { + "External id": 977548,"Sequence number": 10552449, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 139 + } + }, + { + "ph": "f", "id": 17, "pid": 2338710, "tid": 2379450, "ts": 6345937245201.901, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937245241.012, "dur": 3.787, + "args": { + "External id": 977549,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345937245247.550, "dur": 3100.860, + "args": { + "External id": 977550,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937245249.336, "dur": 3098.692, + "args": { + "External id": 977551,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937245252.241, "dur": 4.948, + "args": { + "External id": 977552,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937245261.183, "dur": 3085.290, + "args": { + "External id": 977553,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338710, "tid": 2379450, + "ts": 6345937248351.938, "dur": 0.379, + "args": { + "External id": 977554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338710, "tid": 2379450, + "ts": 6345937248353.843, "dur": 3.413, + "args": { + "External id": 977555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338710, "tid": 2379450, + "ts": 6345937248355.860, "dur": 1.261, + "args": { + "External id": 977556,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338710, "tid": 2379450, + "ts": 6345937248363.149, "dur": 25.612, + "args": { + "External id": 977557,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338710, "tid": 2379450, + "ts": 6345937248398.397, "dur": 46.916, + "args": { + "External id": 977558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338710, "tid": 2379450, + "ts": 6345937248400.133, "dur": 44.984, + "args": { + "External id": 977559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338710, "tid": 2379450, + "ts": 6345937248402.431, "dur": 42.431, + "args": { + "External id": 977560,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345937248613.096, "dur": 22.428, + "args": { + "External id": 977561,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937248652.396, "dur": 16.251, + "args": { + "External id": 977562,"Record function id": 0, "Sequence number": 10552448, "Fwd thread id": 1, "Ev Idx": 153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937248656.951, "dur": 9.617, + "args": { + "External id": 977563,"Sequence number": 10552448, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 154 + } + }, + { + "ph": "f", "id": 18, "pid": 2338710, "tid": 2379450, "ts": 6345937248656.951, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937248660.047, "dur": 6.211, + "args": { + "External id": 977564,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937248661.363, "dur": 4.642, + "args": { + "External id": 977565,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937248672.726, "dur": 85.932, + "args": { + "External id": 977566,"Record function id": 0, "Sequence number": 10552447, "Fwd thread id": 1, "Ev Idx": 157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937248673.676, "dur": 79.830, + "args": { + "External id": 977567,"Sequence number": 10552447, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 158 + } + }, + { + "ph": "f", "id": 19, "pid": 2338710, "tid": 2379450, "ts": 6345937248673.676, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937248678.177, "dur": 74.875, + "args": { + "External id": 977568,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345937248681.632, "dur": 26.587, + "args": { + "External id": 977569,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937248683.482, "dur": 3.756, + "args": { + "External id": 977570,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345937248688.288, "dur": 19.621, + "args": { + "External id": 977571,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345937248690.434, "dur": 17.028, + "args": { + "External id": 977572,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345937248710.504, "dur": 6.003, + "args": { + "External id": 977573,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937248714.598, "dur": 1.339, + "args": { + "External id": 977574,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937248719.630, "dur": 32.302, + "args": { + "External id": 977575,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937248763.938, "dur": 67.545, + "args": { + "External id": 977576,"Record function id": 0, "Sequence number": 10552446, "Fwd thread id": 1, "Ev Idx": 167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937248765.709, "dur": 62.488, + "args": { + "External id": 977577,"Sequence number": 10552446, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 168 + } + }, + { + "ph": "f", "id": 20, "pid": 2338710, "tid": 2379450, "ts": 6345937248765.709, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937248768.126, "dur": 59.616, + "args": { + "External id": 977578,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345937248770.285, "dur": 23.730, + "args": { + "External id": 977579,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937248771.237, "dur": 5.948, + "args": { + "External id": 977580,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345937248777.998, "dur": 15.726, + "args": { + "External id": 977581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345937248778.994, "dur": 14.289, + "args": { + "External id": 977582,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2379450, + "ts": 6345937248795.470, "dur": 12.053, + "args": { + "External id": 977583,"Record function id": 0, "Concrete Inputs": ["", "2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937248804.593, "dur": 1.159, + "args": { + "External id": 977584,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937248808.610, "dur": 18.548, + "args": { + "External id": 977585,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937248835.744, "dur": 137.358, + "args": { + "External id": 977586,"Record function id": 0, "Sequence number": 10552445, "Fwd thread id": 1, "Ev Idx": 177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937248837.117, "dur": 131.271, + "args": { + "External id": 977587,"Sequence number": 10552445, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 178 + } + }, + { + "ph": "f", "id": 21, "pid": 2338710, "tid": 2379450, "ts": 6345937248837.117, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937248839.175, "dur": 128.748, + "args": { + "External id": 977588,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345937248840.567, "dur": 20.921, + "args": { + "External id": 977589,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937248841.472, "dur": 2.505, + "args": { + "External id": 977590,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345937248844.641, "dur": 16.516, + "args": { + "External id": 977591,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345937248845.510, "dur": 15.259, + "args": { + "External id": 977592,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345937248862.674, "dur": 4.669, + "args": { + "External id": 977593,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937248866.229, "dur": 0.797, + "args": { + "External id": 977594,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937248870.745, "dur": 95.950, + "args": { + "External id": 977595,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937248978.484, "dur": 186.946, + "args": { + "External id": 977596,"Record function id": 0, "Sequence number": 10552444, "Fwd thread id": 1, "Ev Idx": 187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937248979.539, "dur": 157.762, + "args": { + "External id": 977597,"Sequence number": 10552444, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 188 + } + }, + { + "ph": "f", "id": 22, "pid": 2338710, "tid": 2379450, "ts": 6345937248979.539, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937248983.820, "dur": 153.082, + "args": { + "External id": 977598,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345937248985.044, "dur": 20.648, + "args": { + "External id": 977599,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937248985.960, "dur": 2.619, + "args": { + "External id": 977600,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345937248989.314, "dur": 16.049, + "args": { + "External id": 977601,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345937248990.429, "dur": 14.512, + "args": { + "External id": 977602,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345937249006.802, "dur": 23.117, + "args": { + "External id": 977603,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937249026.961, "dur": 1.149, + "args": { + "External id": 977604,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937249033.337, "dur": 101.659, + "args": { + "External id": 977605,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345937249144.993, "dur": 18.797, + "args": { + "External id": 977606,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937249176.101, "dur": 42.626, + "args": { + "External id": 977607,"Record function id": 0, "Sequence number": 10552443, "Fwd thread id": 1, "Ev Idx": 198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937249177.646, "dur": 1.527, + "args": { + "External id": 977608,"Sequence number": 10552443, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 199 + } + }, + { + "ph": "f", "id": 23, "pid": 2338710, "tid": 2379450, "ts": 6345937249177.646, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937249181.276, "dur": 32.053, + "args": { + "External id": 977609,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937249183.944, "dur": 28.897, + "args": { + "External id": 977610,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937249190.120, "dur": 0.971, + "args": { + "External id": 977611,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937249223.390, "dur": 3441.393, + "args": { + "External id": 977612,"Record function id": 0, "Sequence number": 10552442, "Fwd thread id": 1, "Ev Idx": 203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937249238.540, "dur": 3387.418, + "args": { + "External id": 977613,"Sequence number": 10552442, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 204 + } + }, + { + "ph": "f", "id": 24, "pid": 2338710, "tid": 2379450, "ts": 6345937249238.540, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937249272.966, "dur": 2.981, + "args": { + "External id": 977614,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345937249279.093, "dur": 3102.962, + "args": { + "External id": 977615,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937249280.998, "dur": 3100.786, + "args": { + "External id": 977616,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937249286.099, "dur": 6.820, + "args": { + "External id": 977617,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937249293.897, "dur": 3086.892, + "args": { + "External id": 977618,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338710, "tid": 2379450, + "ts": 6345937252385.916, "dur": 0.429, + "args": { + "External id": 977619,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338710, "tid": 2379450, + "ts": 6345937252391.259, "dur": 2.820, + "args": { + "External id": 977620,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338710, "tid": 2379450, + "ts": 6345937252392.650, "dur": 1.237, + "args": { + "External id": 977621,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338710, "tid": 2379450, + "ts": 6345937252399.091, "dur": 26.447, + "args": { + "External id": 977622,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338710, "tid": 2379450, + "ts": 6345937252431.376, "dur": 49.091, + "args": { + "External id": 977623,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338710, "tid": 2379450, + "ts": 6345937252432.834, "dur": 47.404, + "args": { + "External id": 977624,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338710, "tid": 2379450, + "ts": 6345937252434.357, "dur": 45.303, + "args": { + "External id": 977625,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345937252640.210, "dur": 19.040, + "args": { + "External id": 977626,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937252681.990, "dur": 15.522, + "args": { + "External id": 977627,"Record function id": 0, "Ev Idx": 218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937252684.953, "dur": 10.624, + "args": { + "External id": 977628,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937252688.731, "dur": 5.605, + "args": { + "External id": 977629,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937252689.881, "dur": 4.309, + "args": { + "External id": 977630,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937252702.344, "dur": 17.459, + "args": { + "External id": 977631,"Record function id": 0, "Sequence number": 10552441, "Fwd thread id": 1, "Ev Idx": 222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937252706.248, "dur": 11.314, + "args": { + "External id": 977632,"Sequence number": 10552441, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 223 + } + }, + { + "ph": "f", "id": 25, "pid": 2338710, "tid": 2379450, "ts": 6345937252706.248, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937252710.150, "dur": 7.151, + "args": { + "External id": 977633,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937252713.350, "dur": 3.774, + "args": { + "External id": 977634,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937252723.589, "dur": 84.947, + "args": { + "External id": 977635,"Record function id": 0, "Sequence number": 10552440, "Fwd thread id": 1, "Ev Idx": 226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937252724.372, "dur": 79.373, + "args": { + "External id": 977636,"Sequence number": 10552440, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 227 + } + }, + { + "ph": "f", "id": 26, "pid": 2338710, "tid": 2379450, "ts": 6345937252724.372, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937252726.941, "dur": 76.340, + "args": { + "External id": 977637,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345937252729.800, "dur": 29.092, + "args": { + "External id": 977638,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937252731.566, "dur": 3.885, + "args": { + "External id": 977639,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345937252737.080, "dur": 21.446, + "args": { + "External id": 977640,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345937252739.697, "dur": 18.134, + "args": { + "External id": 977641,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345937252760.892, "dur": 7.114, + "args": { + "External id": 977642,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937252766.093, "dur": 1.476, + "args": { + "External id": 977643,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937252771.803, "dur": 30.555, + "args": { + "External id": 977644,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937252813.924, "dur": 66.917, + "args": { + "External id": 977645,"Record function id": 0, "Sequence number": 10552439, "Fwd thread id": 1, "Ev Idx": 236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937252814.879, "dur": 62.634, + "args": { + "External id": 977646,"Sequence number": 10552439, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 237 + } + }, + { + "ph": "f", "id": 27, "pid": 2338710, "tid": 2379450, "ts": 6345937252814.879, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937252816.977, "dur": 60.174, + "args": { + "External id": 977647,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345937252819.038, "dur": 26.253, + "args": { + "External id": 977648,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937252825.311, "dur": 3.447, + "args": { + "External id": 977649,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345937252829.657, "dur": 15.334, + "args": { + "External id": 977650,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345937252831.045, "dur": 13.430, + "args": { + "External id": 977651,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2379450, + "ts": 6345937252846.683, "dur": 9.521, + "args": { + "External id": 977652,"Record function id": 0, "Concrete Inputs": ["", "2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937252854.463, "dur": 1.215, + "args": { + "External id": 977653,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937252857.221, "dur": 19.296, + "args": { + "External id": 977654,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937252885.759, "dur": 153.670, + "args": { + "External id": 977655,"Record function id": 0, "Sequence number": 10552438, "Fwd thread id": 1, "Ev Idx": 246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937252886.972, "dur": 146.355, + "args": { + "External id": 977656,"Sequence number": 10552438, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 247 + } + }, + { + "ph": "f", "id": 28, "pid": 2338710, "tid": 2379450, "ts": 6345937252886.972, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937252889.036, "dur": 143.554, + "args": { + "External id": 977657,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345937252893.108, "dur": 21.710, + "args": { + "External id": 977658,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937252894.344, "dur": 5.143, + "args": { + "External id": 977659,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345937252900.346, "dur": 14.152, + "args": { + "External id": 977660,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345937252901.203, "dur": 12.817, + "args": { + "External id": 977661,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345937252915.936, "dur": 2.362, + "args": { + "External id": 977662,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937252917.384, "dur": 0.665, + "args": { + "External id": 977663,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937252921.533, "dur": 109.442, + "args": { + "External id": 977664,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937253047.993, "dur": 189.378, + "args": { + "External id": 977665,"Record function id": 0, "Sequence number": 10552437, "Fwd thread id": 1, "Ev Idx": 256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937253049.378, "dur": 162.468, + "args": { + "External id": 977666,"Sequence number": 10552437, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 257 + } + }, + { + "ph": "f", "id": 29, "pid": 2338710, "tid": 2379450, "ts": 6345937253049.378, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937253051.554, "dur": 159.876, + "args": { + "External id": 977667,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345937253087.651, "dur": 38.451, + "args": { + "External id": 977668,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937253094.110, "dur": 4.590, + "args": { + "External id": 977669,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345937253104.030, "dur": 21.758, + "args": { + "External id": 977670,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345937253107.534, "dur": 17.722, + "args": { + "External id": 977671,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345937253129.783, "dur": 3.102, + "args": { + "External id": 977672,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937253131.514, "dur": 1.068, + "args": { + "External id": 977673,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937253133.903, "dur": 76.435, + "args": { + "External id": 977674,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345937253218.659, "dur": 16.027, + "args": { + "External id": 977675,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937253246.775, "dur": 409.398, + "args": { + "External id": 977676,"Record function id": 0, "Sequence number": 10552436, "Fwd thread id": 1, "Ev Idx": 267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937253248.531, "dur": 397.074, + "args": { + "External id": 977677,"Sequence number": 10552436, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 268 + } + }, + { + "ph": "f", "id": 30, "pid": 2338710, "tid": 2379450, "ts": 6345937253248.531, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345937253431.007, "dur": 55.716, + "args": { + "External id": 977678,"kernel_hash": "c2hiad6vohc5juoazzm5elv5p6zjifqe63zwhygqq3luayunx4no", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "131072", "4096", "1", "993", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2h/c2hiad6vohc5juoazzm5elv5p6zjifqe63zwhygqq3luayunx4no.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[131072, 4096], [4096], [131072, 4096], [131072, 4096], [132, 4096], [131072], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_0", "pid": 2338710, "tid": 2379450, + "ts": 6345937253531.675, "dur": 33.699, + "args": { + "External id": 977679,"kernel_hash": "c6346guyxknvaslrs2ei3ec2tw4wgztd2bkmy2hswhmzwewgk7bb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/63/c6346guyxknvaslrs2ei3ec2tw4wgztd2bkmy2hswhmzwewgk7bb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_1", "pid": 2338710, "tid": 2379450, + "ts": 6345937253588.428, "dur": 25.821, + "args": { + "External id": 977680,"kernel_hash": "cey5irmf4ovxj63ncyq4qfgp27xz4mzqhlhvmoqbijvnsrskn4ba", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ey/cey5irmf4ovxj63ncyq4qfgp27xz4mzqhlhvmoqbijvnsrskn4ba.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937253669.600, "dur": 17.707, + "args": { + "External id": 977681,"Record function id": 0, "Ev Idx": 272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937253673.529, "dur": 12.697, + "args": { + "External id": 977682,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937253677.042, "dur": 8.018, + "args": { + "External id": 977683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937253681.534, "dur": 3.357, + "args": { + "External id": 977684,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: StackBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937253693.343, "dur": 46.127, + "args": { + "External id": 977685,"Record function id": 0, "Sequence number": 10552435, "Fwd thread id": 1, "Ev Idx": 276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "StackBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937253694.392, "dur": 36.770, + "args": { + "External id": 977686,"Sequence number": 10552435, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 277 + } + }, + { + "ph": "f", "id": 31, "pid": 2338710, "tid": 2379450, "ts": 6345937253694.392, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2379450, + "ts": 6345937253698.402, "dur": 11.732, + "args": { + "External id": 977687,"Record function id": 0, "Concrete Inputs": ["", "-2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937253705.105, "dur": 1.612, + "args": { + "External id": 977688,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2379450, + "ts": 6345937253710.850, "dur": 9.273, + "args": { + "External id": 977689,"Record function id": 0, "Concrete Inputs": ["", "-2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937253718.766, "dur": 0.601, + "args": { + "External id": 977690,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2379450, + "ts": 6345937253721.169, "dur": 2.978, + "args": { + "External id": 977691,"Record function id": 0, "Concrete Inputs": ["", "-2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937253722.867, "dur": 0.613, + "args": { + "External id": 977692,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2379450, + "ts": 6345937253724.911, "dur": 5.449, + "args": { + "External id": 977693,"Record function id": 0, "Concrete Inputs": ["", "-2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937253729.050, "dur": 0.523, + "args": { + "External id": 977694,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937253745.207, "dur": 7.788, + "args": { + "External id": 977695,"Record function id": 0, "Sequence number": 10552434, "Fwd thread id": 1, "Ev Idx": 286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937253746.263, "dur": 2.425, + "args": { + "External id": 977696,"Sequence number": 10552434, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 287 + } + }, + { + "ph": "f", "id": 32, "pid": 2338710, "tid": 2379450, "ts": 6345937253746.263, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937253759.420, "dur": 730.146, + "args": { + "External id": 977697,"Record function id": 0, "Sequence number": 10552433, "Fwd thread id": 1, "Ev Idx": 288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937253760.903, "dur": 714.724, + "args": { + "External id": 977698,"Sequence number": 10552433, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 289 + } + }, + { + "ph": "f", "id": 33, "pid": 2338710, "tid": 2379450, "ts": 6345937253760.903, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937253812.427, "dur": 16.223, + "args": { + "External id": 977699,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338710, "tid": 2379450, + "ts": 6345937253822.896, "dur": 5.400, + "args": { + "External id": 977700,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937253835.095, "dur": 10.131, + "args": { + "External id": 977701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937253837.773, "dur": 6.568, + "args": { + "External id": 977702,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937253840.687, "dur": 3.361, + "args": { + "External id": 977703,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2379450, + "ts": 6345937253853.704, "dur": 184.705, + "args": { + "External id": 977704,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937253854.955, "dur": 7.472, + "args": { + "External id": 977705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937253856.766, "dur": 4.767, + "args": { + "External id": 977706,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937253860.725, "dur": 0.667, + "args": { + "External id": 977707,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2379450, + "ts": 6345937253865.085, "dur": 171.127, + "args": { + "External id": 977708,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937253867.017, "dur": 167.337, + "args": { + "External id": 977709,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2379450, + "ts": 6345937254048.297, "dur": 50.538, + "args": { + "External id": 977710,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937254092.965, "dur": 5.387, + "args": { + "External id": 977711,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937254148.172, "dur": 8.636, + "args": { + "External id": 977712,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937254159.412, "dur": 1.965, + "args": { + "External id": 977713,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937254165.895, "dur": 2.400, + "args": { + "External id": 977714,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937254218.304, "dur": 3.160, + "args": { + "External id": 977715,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937254219.358, "dur": 1.909, + "args": { + "External id": 977716,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338710, "tid": 2379450, + "ts": 6345937254253.395, "dur": 192.314, + "args": { + "External id": 977717,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2379450, + "ts": 6345937254261.325, "dur": 15.063, + "args": { + "External id": 977718,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937254268.960, "dur": 4.329, + "args": { + "External id": 977719,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345937254278.815, "dur": 6.943, + "args": { + "External id": 977720,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937254284.173, "dur": 0.765, + "args": { + "External id": 977721,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2379450, + "ts": 6345937254287.869, "dur": 4.351, + "args": { + "External id": 977722,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937254291.246, "dur": 0.579, + "args": { + "External id": 977723,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345937254292.994, "dur": 5.017, + "args": { + "External id": 977724,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937254296.899, "dur": 0.613, + "args": { + "External id": 977725,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345937254302.350, "dur": 5.119, + "args": { + "External id": 977726,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937254306.359, "dur": 0.780, + "args": { + "External id": 977727,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937254310.479, "dur": 9.334, + "args": { + "External id": 977728,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338710, "tid": 2379450, + "ts": 6345937254317.065, "dur": 2.523, + "args": { + "External id": 977729,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345937254320.974, "dur": 5.118, + "args": { + "External id": 977730,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937254325.155, "dur": 0.610, + "args": { + "External id": 977731,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937254326.855, "dur": 2.150, + "args": { + "External id": 977732,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937254327.735, "dur": 1.135, + "args": { + "External id": 977733,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345937254331.374, "dur": 99.082, + "args": { + "External id": 977734,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937254432.468, "dur": 2.455, + "args": { + "External id": 977735,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345937254438.128, "dur": 2.791, + "args": { + "External id": 977736,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937254439.637, "dur": 0.677, + "args": { + "External id": 977737,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937254443.274, "dur": 1.022, + "args": { + "External id": 977738,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937254508.885, "dur": 12.959, + "args": { + "External id": 977739,"Record function id": 0, "Ev Idx": 330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937254511.518, "dur": 9.289, + "args": { + "External id": 977740,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937254515.799, "dur": 4.025, + "args": { + "External id": 977741,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937254517.082, "dur": 2.596, + "args": { + "External id": 977742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937254526.729, "dur": 11.262, + "args": { + "External id": 977743,"Record function id": 0, "Sequence number": 10552432, "Fwd thread id": 1, "Ev Idx": 334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937254527.803, "dur": 7.396, + "args": { + "External id": 977744,"Sequence number": 10552432, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 335 + } + }, + { + "ph": "f", "id": 34, "pid": 2338710, "tid": 2379450, "ts": 6345937254527.803, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937254532.581, "dur": 2.364, + "args": { + "External id": 977745,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937254533.599, "dur": 1.191, + "args": { + "External id": 977746,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937254542.408, "dur": 166.002, + "args": { + "External id": 977747,"Record function id": 0, "Sequence number": 10552431, "Fwd thread id": 1, "Ev Idx": 338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937254543.269, "dur": 155.660, + "args": { + "External id": 977748,"Sequence number": 10552431, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 339 + } + }, + { + "ph": "f", "id": 35, "pid": 2338710, "tid": 2379450, "ts": 6345937254543.269, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937254550.694, "dur": 5.864, + "args": { + "External id": 977749,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937254552.386, "dur": 3.463, + "args": { + "External id": 977750,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937254554.975, "dur": 0.677, + "args": { + "External id": 977751,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937254557.979, "dur": 63.280, + "args": { + "External id": 977752,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937254622.865, "dur": 8.102, + "args": { + "External id": 977753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937254624.090, "dur": 6.075, + "args": { + "External id": 977754,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937254628.904, "dur": 1.008, + "args": { + "External id": 977755,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937254633.027, "dur": 7.932, + "args": { + "External id": 977756,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937254633.974, "dur": 6.451, + "args": { + "External id": 977757,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937254637.224, "dur": 3.123, + "args": { + "External id": 977758,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937254641.774, "dur": 56.176, + "args": { + "External id": 977759,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937254715.515, "dur": 7.806, + "args": { + "External id": 977760,"Record function id": 0, "Sequence number": 10552430, "Fwd thread id": 1, "Ev Idx": 351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937254716.831, "dur": 4.182, + "args": { + "External id": 977761,"Sequence number": 10552430, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 352 + } + }, + { + "ph": "f", "id": 36, "pid": 2338710, "tid": 2379450, "ts": 6345937254716.831, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937254718.822, "dur": 2.013, + "args": { + "External id": 977762,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937254719.512, "dur": 1.197, + "args": { + "External id": 977763,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937254727.479, "dur": 14.317, + "args": { + "External id": 977764,"Record function id": 0, "Sequence number": 10552429, "Fwd thread id": 1, "Ev Idx": 355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937254731.514, "dur": 7.263, + "args": { + "External id": 977765,"Sequence number": 10552429, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 356 + } + }, + { + "ph": "f", "id": 37, "pid": 2338710, "tid": 2379450, "ts": 6345937254731.514, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937254732.424, "dur": 6.102, + "args": { + "External id": 977766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937254733.370, "dur": 4.559, + "args": { + "External id": 977767,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937254737.021, "dur": 0.782, + "args": { + "External id": 977768,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937254746.345, "dur": 5.722, + "args": { + "External id": 977769,"Record function id": 0, "Ev Idx": 360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937254747.648, "dur": 3.864, + "args": { + "External id": 977770,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937254748.950, "dur": 2.284, + "args": { + "External id": 977771,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937254749.574, "dur": 1.553, + "args": { + "External id": 977772,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937254758.244, "dur": 9.446, + "args": { + "External id": 977773,"Record function id": 0, "Sequence number": 10552428, "Fwd thread id": 1, "Ev Idx": 364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937254759.017, "dur": 5.857, + "args": { + "External id": 977774,"Sequence number": 10552428, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 365 + } + }, + { + "ph": "f", "id": 38, "pid": 2338710, "tid": 2379450, "ts": 6345937254759.017, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937254760.122, "dur": 4.599, + "args": { + "External id": 977775,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937254763.721, "dur": 0.893, + "args": { + "External id": 977776,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937254771.524, "dur": 118.320, + "args": { + "External id": 977777,"Record function id": 0, "Sequence number": 10552427, "Fwd thread id": 1, "Ev Idx": 368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937254772.385, "dur": 109.095, + "args": { + "External id": 977778,"Sequence number": 10552427, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 369 + } + }, + { + "ph": "f", "id": 39, "pid": 2338710, "tid": 2379450, "ts": 6345937254772.385, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937254774.314, "dur": 2.750, + "args": { + "External id": 977779,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937254775.099, "dur": 1.500, + "args": { + "External id": 977780,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937254775.885, "dur": 0.593, + "args": { + "External id": 977781,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937254780.624, "dur": 41.927, + "args": { + "External id": 977782,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937254824.044, "dur": 9.035, + "args": { + "External id": 977783,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937254824.853, "dur": 7.446, + "args": { + "External id": 977784,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937254828.994, "dur": 3.150, + "args": { + "External id": 977785,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937254834.514, "dur": 6.280, + "args": { + "External id": 977786,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937254835.519, "dur": 4.572, + "args": { + "External id": 977787,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937254839.530, "dur": 0.482, + "args": { + "External id": 977788,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937254841.610, "dur": 39.044, + "args": { + "External id": 977789,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937254896.826, "dur": 44.980, + "args": { + "External id": 977790,"Record function id": 0, "Sequence number": 10552426, "Fwd thread id": 1, "Ev Idx": 381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937254898.566, "dur": 6.897, + "args": { + "External id": 977791,"Sequence number": 10552426, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 382 + } + }, + { + "ph": "f", "id": 40, "pid": 2338710, "tid": 2379450, "ts": 6345937254898.566, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937254899.986, "dur": 5.313, + "args": { + "External id": 977792,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937254903.586, "dur": 1.573, + "args": { + "External id": 977793,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2379450, + "ts": 6345937254909.248, "dur": 29.003, + "args": { + "External id": 977794,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937254946.310, "dur": 11.715, + "args": { + "External id": 977795,"Record function id": 0, "Sequence number": 10552425, "Fwd thread id": 1, "Ev Idx": 386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937254947.724, "dur": 7.756, + "args": { + "External id": 977796,"Sequence number": 10552425, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 387 + } + }, + { + "ph": "f", "id": 41, "pid": 2338710, "tid": 2379450, "ts": 6345937254947.724, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937254951.754, "dur": 3.500, + "args": { + "External id": 977797,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937254952.724, "dur": 1.928, + "args": { + "External id": 977798,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937254954.051, "dur": 0.432, + "args": { + "External id": 977799,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937254962.851, "dur": 5.731, + "args": { + "External id": 977800,"Record function id": 0, "Ev Idx": 391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937254964.144, "dur": 3.847, + "args": { + "External id": 977801,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937254965.280, "dur": 2.350, + "args": { + "External id": 977802,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937254966.159, "dur": 1.353, + "args": { + "External id": 977803,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937254973.559, "dur": 587.003, + "args": { + "External id": 977804,"Record function id": 0, "Sequence number": 10552424, "Fwd thread id": 1, "Ev Idx": 395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937254974.968, "dur": 569.109, + "args": { + "External id": 977805,"Sequence number": 10552424, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 396 + } + }, + { + "ph": "f", "id": 42, "pid": 2338710, "tid": 2379450, "ts": 6345937254974.968, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338710, "tid": 2379450, + "ts": 6345937255004.187, "dur": 105.303, + "args": { + "External id": 977806,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2379450, + "ts": 6345937255006.035, "dur": 103.149, + "args": { + "External id": 977807,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345937255030.053, "dur": 10.309, + "args": { + "External id": 977808,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937255034.170, "dur": 5.279, + "args": { + "External id": 977809,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937255042.190, "dur": 66.083, + "args": { + "External id": 977810,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937255128.720, "dur": 6.352, + "args": { + "External id": 977811,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937255129.800, "dur": 5.150, + "args": { + "External id": 977812,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937255140.265, "dur": 1.555, + "args": { + "External id": 977813,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937255140.685, "dur": 1.027, + "args": { + "External id": 977814,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937255163.753, "dur": 5.513, + "args": { + "External id": 977815,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937255183.729, "dur": 2.885, + "args": { + "External id": 977816,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937255401.744, "dur": 4.408, + "args": { + "External id": 977817,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937255411.653, "dur": 45.311, + "args": { + "External id": 977818,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937255427.374, "dur": 1.372, + "args": { + "External id": 977819,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345937255463.335, "dur": 37.624, + "args": { + "External id": 977820,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937255465.796, "dur": 34.922, + "args": { + "External id": 977821,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937255470.643, "dur": 4.738, + "args": { + "External id": 977822,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937255479.804, "dur": 20.337, + "args": { + "External id": 977823,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2379450, + "ts": 6345937255506.319, "dur": 2.919, + "args": { + "External id": 977824,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937255507.845, "dur": 1.234, + "args": { + "External id": 977825,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937255517.074, "dur": 8.905, + "args": { + "External id": 977826,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937255521.325, "dur": 4.540, + "args": { + "External id": 977827,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937255528.206, "dur": 2.068, + "args": { + "External id": 977828,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937255529.253, "dur": 0.926, + "args": { + "External id": 977829,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937255578.728, "dur": 10.911, + "args": { + "External id": 977830,"Record function id": 0, "Ev Idx": 421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937255581.262, "dur": 7.487, + "args": { + "External id": 977831,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937255583.923, "dur": 3.634, + "args": { + "External id": 977832,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937255585.093, "dur": 2.303, + "args": { + "External id": 977833,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937255593.906, "dur": 11.395, + "args": { + "External id": 977834,"Record function id": 0, "Sequence number": 10552423, "Fwd thread id": 1, "Ev Idx": 425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937255594.999, "dur": 6.999, + "args": { + "External id": 977835,"Sequence number": 10552423, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 426 + } + }, + { + "ph": "f", "id": 43, "pid": 2338710, "tid": 2379450, "ts": 6345937255594.999, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937255599.359, "dur": 2.334, + "args": { + "External id": 977836,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937255600.319, "dur": 1.231, + "args": { + "External id": 977837,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937255612.499, "dur": 176.898, + "args": { + "External id": 977838,"Record function id": 0, "Sequence number": 10552422, "Fwd thread id": 1, "Ev Idx": 429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937255613.579, "dur": 171.216, + "args": { + "External id": 977839,"Sequence number": 10552422, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 430 + } + }, + { + "ph": "f", "id": 44, "pid": 2338710, "tid": 2379450, "ts": 6345937255613.579, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937255617.059, "dur": 5.596, + "args": { + "External id": 977840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937255618.648, "dur": 3.207, + "args": { + "External id": 977841,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937255620.317, "dur": 1.336, + "args": { + "External id": 977842,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937255624.119, "dur": 92.257, + "args": { + "External id": 977843,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937255717.665, "dur": 9.675, + "args": { + "External id": 977844,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937255718.950, "dur": 7.428, + "args": { + "External id": 977845,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937255722.465, "dur": 3.668, + "args": { + "External id": 977846,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937255729.004, "dur": 5.598, + "args": { + "External id": 977847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937255729.948, "dur": 4.065, + "args": { + "External id": 977848,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937255733.256, "dur": 0.664, + "args": { + "External id": 977849,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937255735.566, "dur": 48.343, + "args": { + "External id": 977850,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937255795.548, "dur": 13.239, + "args": { + "External id": 977851,"Record function id": 0, "Sequence number": 10552421, "Fwd thread id": 1, "Ev Idx": 442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937255796.580, "dur": 9.802, + "args": { + "External id": 977852,"Sequence number": 10552421, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 443 + } + }, + { + "ph": "f", "id": 45, "pid": 2338710, "tid": 2379450, "ts": 6345937255796.580, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937255798.171, "dur": 8.015, + "args": { + "External id": 977853,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937255802.190, "dur": 3.870, + "args": { + "External id": 977854,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937255813.094, "dur": 9.673, + "args": { + "External id": 977855,"Record function id": 0, "Sequence number": 10552420, "Fwd thread id": 1, "Ev Idx": 446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937255814.215, "dur": 6.763, + "args": { + "External id": 977856,"Sequence number": 10552420, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 447 + } + }, + { + "ph": "f", "id": 46, "pid": 2338710, "tid": 2379450, "ts": 6345937255814.215, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937255815.085, "dur": 5.653, + "args": { + "External id": 977857,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937255816.025, "dur": 4.120, + "args": { + "External id": 977858,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937255819.422, "dur": 0.584, + "args": { + "External id": 977859,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937255827.140, "dur": 5.893, + "args": { + "External id": 977860,"Record function id": 0, "Ev Idx": 451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937255828.660, "dur": 3.748, + "args": { + "External id": 977861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937255829.873, "dur": 2.228, + "args": { + "External id": 977862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937255830.853, "dur": 1.088, + "args": { + "External id": 977863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937255837.027, "dur": 10.467, + "args": { + "External id": 977864,"Record function id": 0, "Sequence number": 10552419, "Fwd thread id": 1, "Ev Idx": 455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937255840.644, "dur": 4.375, + "args": { + "External id": 977865,"Sequence number": 10552419, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 456 + } + }, + { + "ph": "f", "id": 47, "pid": 2338710, "tid": 2379450, "ts": 6345937255840.644, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937255842.121, "dur": 2.716, + "args": { + "External id": 977866,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937255843.194, "dur": 1.493, + "args": { + "External id": 977867,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937255855.825, "dur": 504.279, + "args": { + "External id": 977868,"Record function id": 0, "Sequence number": 10552418, "Fwd thread id": 1, "Ev Idx": 459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937255857.127, "dur": 481.827, + "args": { + "External id": 977869,"Sequence number": 10552418, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 460 + } + }, + { + "ph": "f", "id": 48, "pid": 2338710, "tid": 2379450, "ts": 6345937255857.127, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345937255874.025, "dur": 11.926, + "args": { + "External id": 977870,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937255880.913, "dur": 4.460, + "args": { + "External id": 977871,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345937255888.366, "dur": 3.646, + "args": { + "External id": 977872,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937255889.400, "dur": 2.349, + "args": { + "External id": 977873,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345937255893.527, "dur": 6.593, + "args": { + "External id": 977874,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937255897.747, "dur": 2.134, + "args": { + "External id": 977875,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937255934.274, "dur": 372.509, + "args": { + "External id": 977876,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937256094.380, "dur": 7.737, + "args": { + "External id": 977877,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937256107.800, "dur": 5.074, + "args": { + "External id": 977878,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937256116.274, "dur": 1.742, + "args": { + "External id": 977879,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937256119.021, "dur": 1.866, + "args": { + "External id": 977880,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937256184.606, "dur": 3.183, + "args": { + "External id": 977881,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937256185.711, "dur": 1.910, + "args": { + "External id": 977882,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937256190.761, "dur": 34.396, + "args": { + "External id": 977883,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937256196.010, "dur": 3.004, + "args": { + "External id": 977884,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937256226.752, "dur": 1.557, + "args": { + "External id": 977885,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937256227.566, "dur": 0.671, + "args": { + "External id": 977886,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937256233.566, "dur": 19.240, + "args": { + "External id": 977887,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937256237.349, "dur": 0.686, + "args": { + "External id": 977888,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2379450, + "ts": 6345937256323.768, "dur": 4.345, + "args": { + "External id": 977889,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2379450, + "ts": 6345937256331.614, "dur": 0.726, + "args": { + "External id": 977890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2379450, + "ts": 6345937256334.584, "dur": 0.776, + "args": { + "External id": 977891,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937256374.600, "dur": 279.862, + "args": { + "External id": 977892,"Record function id": 0, "Sequence number": 10552417, "Fwd thread id": 1, "Ev Idx": 483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937256376.502, "dur": 270.302, + "args": { + "External id": 977893,"Sequence number": 10552417, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 484 + } + }, + { + "ph": "f", "id": 49, "pid": 2338710, "tid": 2379450, "ts": 6345937256376.502, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2379450, + "ts": 6345937256402.448, "dur": 59.326, + "args": { + "External id": 977894,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937256406.244, "dur": 4.453, + "args": { + "External id": 977895,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937256412.558, "dur": 48.568, + "args": { + "External id": 977896,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345937256473.449, "dur": 7.784, + "args": { + "External id": 977897,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937256478.306, "dur": 2.616, + "args": { + "External id": 977898,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937256662.982, "dur": 204.977, + "args": { + "External id": 977899,"Record function id": 0, "Sequence number": 10552416, "Fwd thread id": 1, "Ev Idx": 490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937256665.015, "dur": 195.818, + "args": { + "External id": 977900,"Sequence number": 10552416, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 491 + } + }, + { + "ph": "f", "id": 50, "pid": 2338710, "tid": 2379450, "ts": 6345937256665.015, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2379450, + "ts": 6345937256678.044, "dur": 55.098, + "args": { + "External id": 977901,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937256680.354, "dur": 3.381, + "args": { + "External id": 977902,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937256684.799, "dur": 47.578, + "args": { + "External id": 977903,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345937256741.776, "dur": 10.462, + "args": { + "External id": 977904,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937256746.766, "dur": 5.130, + "args": { + "External id": 977905,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937256875.456, "dur": 16.146, + "args": { + "External id": 977906,"Record function id": 0, "Sequence number": 10552415, "Fwd thread id": 1, "Ev Idx": 497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937256879.710, "dur": 8.628, + "args": { + "External id": 977907,"Sequence number": 10552415, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 498 + } + }, + { + "ph": "f", "id": 51, "pid": 2338710, "tid": 2379450, "ts": 6345937256879.710, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937256882.441, "dur": 5.525, + "args": { + "External id": 977908,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937256883.695, "dur": 4.089, + "args": { + "External id": 977909,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937256895.781, "dur": 9.901, + "args": { + "External id": 977910,"Record function id": 0, "Sequence number": 10552414, "Fwd thread id": 1, "Ev Idx": 501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937256897.127, "dur": 6.123, + "args": { + "External id": 977911,"Sequence number": 10552414, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 502 + } + }, + { + "ph": "f", "id": 52, "pid": 2338710, "tid": 2379450, "ts": 6345937256897.127, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937256898.268, "dur": 4.796, + "args": { + "External id": 977912,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937256901.791, "dur": 1.099, + "args": { + "External id": 977913,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937256909.539, "dur": 8.733, + "args": { + "External id": 977914,"Record function id": 0, "Sequence number": 10552413, "Fwd thread id": 1, "Ev Idx": 505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937256910.731, "dur": 5.395, + "args": { + "External id": 977915,"Sequence number": 10552413, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 506 + } + }, + { + "ph": "f", "id": 53, "pid": 2338710, "tid": 2379450, "ts": 6345937256910.731, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937256911.875, "dur": 4.062, + "args": { + "External id": 977916,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937256914.758, "dur": 1.078, + "args": { + "External id": 977917,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937256922.051, "dur": 35.884, + "args": { + "External id": 977918,"Record function id": 0, "Sequence number": 10552412, "Fwd thread id": 1, "Ev Idx": 509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937256923.171, "dur": 32.560, + "args": { + "External id": 977919,"Sequence number": 10552412, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 510 + } + }, + { + "ph": "f", "id": 54, "pid": 2338710, "tid": 2379450, "ts": 6345937256923.171, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937256926.694, "dur": 28.836, + "args": { + "External id": 977920,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937256954.282, "dur": 1.068, + "args": { + "External id": 977921,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937256961.902, "dur": 268.504, + "args": { + "External id": 977922,"Record function id": 0, "Sequence number": 10552411, "Fwd thread id": 1, "Ev Idx": 513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937256962.708, "dur": 257.472, + "args": { + "External id": 977923,"Sequence number": 10552411, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 514 + } + }, + { + "ph": "f", "id": 55, "pid": 2338710, "tid": 2379450, "ts": 6345937256962.708, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937256969.442, "dur": 6.341, + "args": { + "External id": 977924,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937256971.515, "dur": 3.551, + "args": { + "External id": 977925,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937256973.328, "dur": 1.451, + "args": { + "External id": 977926,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937256977.588, "dur": 151.076, + "args": { + "External id": 977927,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937257132.174, "dur": 13.024, + "args": { + "External id": 977928,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937257133.691, "dur": 10.252, + "args": { + "External id": 977929,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937257139.437, "dur": 4.267, + "args": { + "External id": 977930,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937257147.972, "dur": 8.643, + "args": { + "External id": 977931,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937257149.051, "dur": 6.956, + "args": { + "External id": 977932,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937257152.669, "dur": 3.247, + "args": { + "External id": 977933,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937257157.588, "dur": 61.504, + "args": { + "External id": 977934,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937257240.550, "dur": 12.846, + "args": { + "External id": 977935,"Record function id": 0, "Sequence number": 10552410, "Fwd thread id": 1, "Ev Idx": 526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937257242.045, "dur": 8.435, + "args": { + "External id": 977936,"Sequence number": 10552410, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 527 + } + }, + { + "ph": "f", "id": 56, "pid": 2338710, "tid": 2379450, "ts": 6345937257242.045, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937257244.116, "dur": 6.153, + "args": { + "External id": 977937,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937257247.918, "dur": 2.242, + "args": { + "External id": 977938,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937257257.494, "dur": 10.744, + "args": { + "External id": 977939,"Record function id": 0, "Sequence number": 10552409, "Fwd thread id": 1, "Ev Idx": 530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937257258.342, "dur": 7.649, + "args": { + "External id": 977940,"Sequence number": 10552409, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 531 + } + }, + { + "ph": "f", "id": 57, "pid": 2338710, "tid": 2379450, "ts": 6345937257258.342, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937257259.264, "dur": 6.494, + "args": { + "External id": 977941,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937257260.363, "dur": 4.757, + "args": { + "External id": 977942,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937257264.224, "dur": 0.791, + "args": { + "External id": 977943,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937257275.304, "dur": 13.609, + "args": { + "External id": 977944,"Record function id": 0, "Ev Idx": 535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937257276.953, "dur": 11.053, + "args": { + "External id": 977945,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937257280.068, "dur": 7.496, + "args": { + "External id": 977946,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937257284.473, "dur": 2.951, + "args": { + "External id": 977947,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937257294.815, "dur": 7.359, + "args": { + "External id": 977948,"Record function id": 0, "Sequence number": 10552408, "Fwd thread id": 1, "Ev Idx": 539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937257296.168, "dur": 3.638, + "args": { + "External id": 977949,"Sequence number": 10552408, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 540 + } + }, + { + "ph": "f", "id": 58, "pid": 2338710, "tid": 2379450, "ts": 6345937257296.168, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937257297.891, "dur": 1.754, + "args": { + "External id": 977950,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937257298.627, "dur": 0.860, + "args": { + "External id": 977951,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937257306.089, "dur": 109.297, + "args": { + "External id": 977952,"Record function id": 0, "Sequence number": 10552407, "Fwd thread id": 1, "Ev Idx": 543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937257307.119, "dur": 101.529, + "args": { + "External id": 977953,"Sequence number": 10552407, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 544 + } + }, + { + "ph": "f", "id": 59, "pid": 2338710, "tid": 2379450, "ts": 6345937257307.119, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937257310.024, "dur": 5.238, + "args": { + "External id": 977954,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937257310.540, "dur": 4.226, + "args": { + "External id": 977955,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937257314.155, "dur": 0.496, + "args": { + "External id": 977956,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937257316.197, "dur": 32.807, + "args": { + "External id": 977957,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937257350.451, "dur": 8.203, + "args": { + "External id": 977958,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937257350.998, "dur": 6.907, + "args": { + "External id": 977959,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937257354.280, "dur": 3.501, + "args": { + "External id": 977960,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937257360.114, "dur": 5.218, + "args": { + "External id": 977961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937257363.674, "dur": 1.189, + "args": { + "External id": 977962,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937257364.381, "dur": 0.382, + "args": { + "External id": 977963,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937257365.945, "dur": 41.686, + "args": { + "External id": 977964,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937257421.627, "dur": 40.075, + "args": { + "External id": 977965,"Record function id": 0, "Sequence number": 10552406, "Fwd thread id": 1, "Ev Idx": 556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937257422.679, "dur": 6.664, + "args": { + "External id": 977966,"Sequence number": 10552406, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 557 + } + }, + { + "ph": "f", "id": 60, "pid": 2338710, "tid": 2379450, "ts": 6345937257422.679, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937257424.391, "dur": 4.761, + "args": { + "External id": 977967,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937257427.601, "dur": 1.445, + "args": { + "External id": 977968,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2379450, + "ts": 6345937257433.121, "dur": 25.719, + "args": { + "External id": 977969,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937257466.572, "dur": 10.581, + "args": { + "External id": 977970,"Record function id": 0, "Sequence number": 10552405, "Fwd thread id": 1, "Ev Idx": 561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937257470.776, "dur": 4.492, + "args": { + "External id": 977971,"Sequence number": 10552405, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 562 + } + }, + { + "ph": "f", "id": 61, "pid": 2338710, "tid": 2379450, "ts": 6345937257470.776, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937257471.686, "dur": 3.326, + "args": { + "External id": 977972,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937257472.612, "dur": 1.847, + "args": { + "External id": 977973,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937257473.718, "dur": 0.620, + "args": { + "External id": 977974,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937257481.825, "dur": 5.267, + "args": { + "External id": 977975,"Record function id": 0, "Ev Idx": 566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937257483.098, "dur": 3.360, + "args": { + "External id": 977976,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937257484.279, "dur": 1.766, + "args": { + "External id": 977977,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937257484.920, "dur": 1.016, + "args": { + "External id": 977978,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937257490.956, "dur": 12.199, + "args": { + "External id": 977979,"Record function id": 0, "Sequence number": 10552404, "Fwd thread id": 1, "Ev Idx": 570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937257492.257, "dur": 8.917, + "args": { + "External id": 977980,"Sequence number": 10552404, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 571 + } + }, + { + "ph": "f", "id": 62, "pid": 2338710, "tid": 2379450, "ts": 6345937257492.257, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937257496.237, "dur": 4.761, + "args": { + "External id": 977981,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937257499.937, "dur": 0.930, + "args": { + "External id": 977982,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937257506.825, "dur": 118.185, + "args": { + "External id": 977983,"Record function id": 0, "Sequence number": 10552403, "Fwd thread id": 1, "Ev Idx": 574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937257507.533, "dur": 108.912, + "args": { + "External id": 977984,"Sequence number": 10552403, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 575 + } + }, + { + "ph": "f", "id": 63, "pid": 2338710, "tid": 2379450, "ts": 6345937257507.533, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937257509.820, "dur": 5.393, + "args": { + "External id": 977985,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937257510.610, "dur": 4.087, + "args": { + "External id": 977986,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937257511.510, "dur": 3.059, + "args": { + "External id": 977987,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937257522.091, "dur": 38.454, + "args": { + "External id": 977988,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937257562.005, "dur": 5.810, + "args": { + "External id": 977989,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937257562.698, "dur": 4.403, + "args": { + "External id": 977990,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937257563.886, "dur": 3.061, + "args": { + "External id": 977991,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937257568.985, "dur": 6.136, + "args": { + "External id": 977992,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937257570.225, "dur": 4.234, + "args": { + "External id": 977993,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937257573.692, "dur": 0.691, + "args": { + "External id": 977994,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937257578.693, "dur": 36.800, + "args": { + "External id": 977995,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937257630.690, "dur": 26.725, + "args": { + "External id": 977996,"Record function id": 0, "Sequence number": 10552402, "Fwd thread id": 1, "Ev Idx": 587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937257631.708, "dur": 3.681, + "args": { + "External id": 977997,"Sequence number": 10552402, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 588 + } + }, + { + "ph": "f", "id": 64, "pid": 2338710, "tid": 2379450, "ts": 6345937257631.708, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937257633.005, "dur": 2.215, + "args": { + "External id": 977998,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937257633.812, "dur": 1.268, + "args": { + "External id": 977999,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345937257638.164, "dur": 16.836, + "args": { + "External id": 978000,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937257661.932, "dur": 13.021, + "args": { + "External id": 978001,"Record function id": 0, "Sequence number": 10552401, "Fwd thread id": 1, "Ev Idx": 592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937257663.038, "dur": 9.758, + "args": { + "External id": 978002,"Sequence number": 10552401, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 593 + } + }, + { + "ph": "f", "id": 65, "pid": 2338710, "tid": 2379450, "ts": 6345937257663.038, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937257663.859, "dur": 8.676, + "args": { + "External id": 978003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937257664.611, "dur": 7.329, + "args": { + "External id": 978004,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937257671.040, "dur": 0.773, + "args": { + "External id": 978005,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937257679.623, "dur": 7.351, + "args": { + "External id": 978006,"Record function id": 0, "Ev Idx": 597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937257680.800, "dur": 5.626, + "args": { + "External id": 978007,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937257682.055, "dur": 4.015, + "args": { + "External id": 978008,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937257682.708, "dur": 3.273, + "args": { + "External id": 978009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937257691.939, "dur": 504.273, + "args": { + "External id": 978010,"Record function id": 0, "Sequence number": 10552400, "Fwd thread id": 1, "Ev Idx": 601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937257693.113, "dur": 460.770, + "args": { + "External id": 978011,"Sequence number": 10552400, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 602 + } + }, + { + "ph": "f", "id": 66, "pid": 2338710, "tid": 2379450, "ts": 6345937257693.113, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937257734.945, "dur": 2.066, + "args": { + "External id": 978012,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937257735.553, "dur": 1.287, + "args": { + "External id": 978013,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937257756.871, "dur": 4.871, + "args": { + "External id": 978014,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937257773.019, "dur": 2.040, + "args": { + "External id": 978015,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937257966.873, "dur": 2.100, + "args": { + "External id": 978016,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937257973.552, "dur": 56.975, + "args": { + "External id": 978017,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937257983.747, "dur": 0.779, + "args": { + "External id": 978018,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345937258039.373, "dur": 79.389, + "args": { + "External id": 978019,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937258041.454, "dur": 76.957, + "args": { + "External id": 978020,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937258049.247, "dur": 42.766, + "args": { + "External id": 978021,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937258096.701, "dur": 21.088, + "args": { + "External id": 978022,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2379450, + "ts": 6345937258127.951, "dur": 3.598, + "args": { + "External id": 978023,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937258129.449, "dur": 1.981, + "args": { + "External id": 978024,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937258139.746, "dur": 4.884, + "args": { + "External id": 978025,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937258140.841, "dur": 3.690, + "args": { + "External id": 978026,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2379450, + "ts": 6345937258168.127, "dur": 20.757, + "args": { + "External id": 978027,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937258211.859, "dur": 13.096, + "args": { + "External id": 978028,"Record function id": 0, "Ev Idx": 619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937258216.961, "dur": 7.022, + "args": { + "External id": 978029,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937258219.435, "dur": 3.466, + "args": { + "External id": 978030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937258220.589, "dur": 2.174, + "args": { + "External id": 978031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937258232.250, "dur": 6.564, + "args": { + "External id": 978032,"Record function id": 0, "Sequence number": 10552399, "Fwd thread id": 1, "Ev Idx": 623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937258233.684, "dur": 1.481, + "args": { + "External id": 978033,"Sequence number": 10552399, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 624 + } + }, + { + "ph": "f", "id": 67, "pid": 2338710, "tid": 2379450, "ts": 6345937258233.684, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937258243.495, "dur": 474.714, + "args": { + "External id": 978034,"Record function id": 0, "Sequence number": 10552398, "Fwd thread id": 1, "Ev Idx": 625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937258244.679, "dur": 462.320, + "args": { + "External id": 978035,"Sequence number": 10552398, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 626 + } + }, + { + "ph": "f", "id": 68, "pid": 2338710, "tid": 2379450, "ts": 6345937258244.679, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937258282.006, "dur": 8.955, + "args": { + "External id": 978036,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338710, "tid": 2379450, + "ts": 6345937258287.067, "dur": 3.526, + "args": { + "External id": 978037,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937258294.949, "dur": 8.278, + "args": { + "External id": 978038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937258296.786, "dur": 5.762, + "args": { + "External id": 978039,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937258301.537, "dur": 0.846, + "args": { + "External id": 978040,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2379450, + "ts": 6345937258309.609, "dur": 102.797, + "args": { + "External id": 978041,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937258310.721, "dur": 3.099, + "args": { + "External id": 978042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937258311.570, "dur": 1.592, + "args": { + "External id": 978043,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937258312.586, "dur": 0.463, + "args": { + "External id": 978044,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2379450, + "ts": 6345937258315.173, "dur": 96.508, + "args": { + "External id": 978045,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937258317.163, "dur": 93.381, + "args": { + "External id": 978046,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2379450, + "ts": 6345937258416.666, "dur": 6.396, + "args": { + "External id": 978047,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937258421.273, "dur": 1.616, + "args": { + "External id": 978048,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937258460.198, "dur": 8.180, + "args": { + "External id": 978049,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937258471.815, "dur": 2.153, + "args": { + "External id": 978050,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937258474.762, "dur": 1.842, + "args": { + "External id": 978051,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937258512.520, "dur": 2.240, + "args": { + "External id": 978052,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937258513.314, "dur": 1.277, + "args": { + "External id": 978053,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338710, "tid": 2379450, + "ts": 6345937258540.773, "dur": 143.969, + "args": { + "External id": 978054,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2379450, + "ts": 6345937258546.488, "dur": 10.300, + "args": { + "External id": 978055,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937258552.001, "dur": 3.588, + "args": { + "External id": 978056,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345937258558.245, "dur": 7.310, + "args": { + "External id": 978057,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937258564.036, "dur": 0.697, + "args": { + "External id": 978058,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2379450, + "ts": 6345937258566.961, "dur": 2.528, + "args": { + "External id": 978059,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937258568.355, "dur": 0.737, + "args": { + "External id": 978060,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345937258572.468, "dur": 2.540, + "args": { + "External id": 978061,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937258573.708, "dur": 0.775, + "args": { + "External id": 978062,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345937258581.078, "dur": 4.437, + "args": { + "External id": 978063,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937258582.422, "dur": 2.702, + "args": { + "External id": 978064,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937258588.618, "dur": 8.170, + "args": { + "External id": 978065,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338710, "tid": 2379450, + "ts": 6345937258594.357, "dur": 2.251, + "args": { + "External id": 978066,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345937258597.698, "dur": 3.598, + "args": { + "External id": 978067,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937258600.591, "dur": 0.385, + "args": { + "External id": 978068,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937258601.891, "dur": 4.527, + "args": { + "External id": 978069,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937258602.588, "dur": 3.719, + "args": { + "External id": 978070,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345937258608.132, "dur": 60.414, + "args": { + "External id": 978071,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937258672.556, "dur": 1.493, + "args": { + "External id": 978072,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345937258674.670, "dur": 5.056, + "args": { + "External id": 978073,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937258678.602, "dur": 0.495, + "args": { + "External id": 978074,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937258682.098, "dur": 1.353, + "args": { + "External id": 978075,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937258728.404, "dur": 11.482, + "args": { + "External id": 978076,"Record function id": 0, "Ev Idx": 667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937258730.444, "dur": 8.456, + "args": { + "External id": 978077,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937258732.451, "dur": 5.341, + "args": { + "External id": 978078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937258733.447, "dur": 4.219, + "args": { + "External id": 978079,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937258744.065, "dur": 7.396, + "args": { + "External id": 978080,"Record function id": 0, "Sequence number": 10552397, "Fwd thread id": 1, "Ev Idx": 671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937258745.211, "dur": 3.872, + "args": { + "External id": 978081,"Sequence number": 10552397, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 672 + } + }, + { + "ph": "f", "id": 69, "pid": 2338710, "tid": 2379450, "ts": 6345937258745.211, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937258747.012, "dur": 1.890, + "args": { + "External id": 978082,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937258747.627, "dur": 1.140, + "args": { + "External id": 978083,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937258755.831, "dur": 123.275, + "args": { + "External id": 978084,"Record function id": 0, "Sequence number": 10552396, "Fwd thread id": 1, "Ev Idx": 675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937258759.437, "dur": 113.604, + "args": { + "External id": 978085,"Sequence number": 10552396, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 676 + } + }, + { + "ph": "f", "id": 70, "pid": 2338710, "tid": 2379450, "ts": 6345937258759.437, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937258763.154, "dur": 4.206, + "args": { + "External id": 978086,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937258764.579, "dur": 2.206, + "args": { + "External id": 978087,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937258765.946, "dur": 0.617, + "args": { + "External id": 978088,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937258768.646, "dur": 43.563, + "args": { + "External id": 978089,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937258813.693, "dur": 8.646, + "args": { + "External id": 978090,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937258814.393, "dur": 7.125, + "args": { + "External id": 978091,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937258820.253, "dur": 1.048, + "args": { + "External id": 978092,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937258823.963, "dur": 6.944, + "args": { + "External id": 978093,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937258828.970, "dur": 1.349, + "args": { + "External id": 978094,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937258829.704, "dur": 0.536, + "args": { + "External id": 978095,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937258831.495, "dur": 40.538, + "args": { + "External id": 978096,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937258885.044, "dur": 11.340, + "args": { + "External id": 978097,"Record function id": 0, "Sequence number": 10552395, "Fwd thread id": 1, "Ev Idx": 688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937258886.183, "dur": 8.428, + "args": { + "External id": 978098,"Sequence number": 10552395, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 689 + } + }, + { + "ph": "f", "id": 71, "pid": 2338710, "tid": 2379450, "ts": 6345937258886.183, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937258890.076, "dur": 4.369, + "args": { + "External id": 978099,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937258893.136, "dur": 1.187, + "args": { + "External id": 978100,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937258900.303, "dur": 10.017, + "args": { + "External id": 978101,"Record function id": 0, "Sequence number": 10552394, "Fwd thread id": 1, "Ev Idx": 692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937258901.273, "dur": 7.134, + "args": { + "External id": 978102,"Sequence number": 10552394, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 693 + } + }, + { + "ph": "f", "id": 72, "pid": 2338710, "tid": 2379450, "ts": 6345937258901.273, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937258902.638, "dur": 5.514, + "args": { + "External id": 978103,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937258903.409, "dur": 4.215, + "args": { + "External id": 978104,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937258904.791, "dur": 2.727, + "args": { + "External id": 978105,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937258914.789, "dur": 7.480, + "args": { + "External id": 978106,"Record function id": 0, "Ev Idx": 697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937258915.909, "dur": 5.782, + "args": { + "External id": 978107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937258917.111, "dur": 4.350, + "args": { + "External id": 978108,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937258920.190, "dur": 1.085, + "args": { + "External id": 978109,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937258926.143, "dur": 8.163, + "args": { + "External id": 978110,"Record function id": 0, "Sequence number": 10552393, "Fwd thread id": 1, "Ev Idx": 701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937258926.970, "dur": 5.037, + "args": { + "External id": 978111,"Sequence number": 10552393, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 702 + } + }, + { + "ph": "f", "id": 73, "pid": 2338710, "tid": 2379450, "ts": 6345937258926.970, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937258928.068, "dur": 3.766, + "args": { + "External id": 978112,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937258931.049, "dur": 0.669, + "args": { + "External id": 978113,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937258937.856, "dur": 173.285, + "args": { + "External id": 978114,"Record function id": 0, "Sequence number": 10552392, "Fwd thread id": 1, "Ev Idx": 705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937258938.563, "dur": 161.842, + "args": { + "External id": 978115,"Sequence number": 10552392, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 706 + } + }, + { + "ph": "f", "id": 74, "pid": 2338710, "tid": 2379450, "ts": 6345937258938.563, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937258941.031, "dur": 5.227, + "args": { + "External id": 978116,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937258941.795, "dur": 3.951, + "args": { + "External id": 978117,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937258945.245, "dur": 0.389, + "args": { + "External id": 978118,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937258947.026, "dur": 36.590, + "args": { + "External id": 978119,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937258985.090, "dur": 3.675, + "args": { + "External id": 978120,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937258985.609, "dur": 2.615, + "args": { + "External id": 978121,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937258987.076, "dur": 1.017, + "args": { + "External id": 978122,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937258990.098, "dur": 7.838, + "args": { + "External id": 978123,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937258993.698, "dur": 3.710, + "args": { + "External id": 978124,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937258996.836, "dur": 0.499, + "args": { + "External id": 978125,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937258998.730, "dur": 99.807, + "args": { + "External id": 978126,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937259120.389, "dur": 37.841, + "args": { + "External id": 978127,"Record function id": 0, "Sequence number": 10552391, "Fwd thread id": 1, "Ev Idx": 718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937259121.729, "dur": 6.971, + "args": { + "External id": 978128,"Sequence number": 10552391, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 719 + } + }, + { + "ph": "f", "id": 75, "pid": 2338710, "tid": 2379450, "ts": 6345937259121.729, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937259123.723, "dur": 4.790, + "args": { + "External id": 978129,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937259124.467, "dur": 3.933, + "args": { + "External id": 978130,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2379450, + "ts": 6345937259132.054, "dur": 22.680, + "args": { + "External id": 978131,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937259162.740, "dur": 11.708, + "args": { + "External id": 978132,"Record function id": 0, "Sequence number": 10552390, "Fwd thread id": 1, "Ev Idx": 723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937259164.111, "dur": 7.832, + "args": { + "External id": 978133,"Sequence number": 10552390, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 724 + } + }, + { + "ph": "f", "id": 76, "pid": 2338710, "tid": 2379450, "ts": 6345937259164.111, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937259168.161, "dur": 3.530, + "args": { + "External id": 978134,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937259169.201, "dur": 1.928, + "args": { + "External id": 978135,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937259170.363, "dur": 0.633, + "args": { + "External id": 978136,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937259181.596, "dur": 6.638, + "args": { + "External id": 978137,"Record function id": 0, "Ev Idx": 728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937259183.370, "dur": 4.320, + "args": { + "External id": 978138,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937259184.980, "dur": 2.341, + "args": { + "External id": 978139,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937259185.558, "dur": 1.629, + "args": { + "External id": 978140,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937259194.734, "dur": 482.242, + "args": { + "External id": 978141,"Record function id": 0, "Sequence number": 10552389, "Fwd thread id": 1, "Ev Idx": 732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937259195.966, "dur": 444.341, + "args": { + "External id": 978142,"Sequence number": 10552389, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 733 + } + }, + { + "ph": "f", "id": 77, "pid": 2338710, "tid": 2379450, "ts": 6345937259195.966, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338710, "tid": 2379450, + "ts": 6345937259222.787, "dur": 39.165, + "args": { + "External id": 978143,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2379450, + "ts": 6345937259224.405, "dur": 37.317, + "args": { + "External id": 978144,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345937259227.657, "dur": 7.033, + "args": { + "External id": 978145,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937259230.551, "dur": 3.425, + "args": { + "External id": 978146,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937259236.190, "dur": 25.002, + "args": { + "External id": 978147,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937259277.516, "dur": 4.734, + "args": { + "External id": 978148,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937259280.746, "dur": 1.365, + "args": { + "External id": 978149,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937259286.688, "dur": 1.911, + "args": { + "External id": 978150,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937259287.588, "dur": 0.857, + "args": { + "External id": 978151,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937259305.112, "dur": 2.961, + "args": { + "External id": 978152,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937259320.545, "dur": 4.532, + "args": { + "External id": 978153,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937259513.165, "dur": 3.659, + "args": { + "External id": 978154,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937259522.034, "dur": 39.775, + "args": { + "External id": 978155,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937259536.511, "dur": 0.868, + "args": { + "External id": 978156,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345937259568.550, "dur": 33.111, + "args": { + "External id": 978157,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937259570.757, "dur": 30.660, + "args": { + "External id": 978158,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937259575.293, "dur": 6.399, + "args": { + "External id": 978159,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937259585.644, "dur": 15.241, + "args": { + "External id": 978160,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2379450, + "ts": 6345937259607.001, "dur": 4.953, + "args": { + "External id": 978161,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937259610.296, "dur": 1.495, + "args": { + "External id": 978162,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937259619.745, "dur": 2.355, + "args": { + "External id": 978163,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937259620.512, "dur": 1.462, + "args": { + "External id": 978164,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937259624.525, "dur": 4.112, + "args": { + "External id": 978165,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937259627.314, "dur": 1.202, + "args": { + "External id": 978166,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345937259654.697, "dur": 20.471, + "args": { + "External id": 978167,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937259688.065, "dur": 8.351, + "args": { + "External id": 978168,"Record function id": 0, "Ev Idx": 759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937259690.267, "dur": 5.429, + "args": { + "External id": 978169,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937259692.156, "dur": 2.413, + "args": { + "External id": 978170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937259693.126, "dur": 1.328, + "args": { + "External id": 978171,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937259700.437, "dur": 11.379, + "args": { + "External id": 978172,"Record function id": 0, "Sequence number": 10552388, "Fwd thread id": 1, "Ev Idx": 763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937259701.485, "dur": 6.827, + "args": { + "External id": 978173,"Sequence number": 10552388, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 764 + } + }, + { + "ph": "f", "id": 78, "pid": 2338710, "tid": 2379450, "ts": 6345937259701.485, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937259703.037, "dur": 5.059, + "args": { + "External id": 978174,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937259706.674, "dur": 1.282, + "args": { + "External id": 978175,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937259715.771, "dur": 160.122, + "args": { + "External id": 978176,"Record function id": 0, "Sequence number": 10552387, "Fwd thread id": 1, "Ev Idx": 767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937259716.755, "dur": 151.024, + "args": { + "External id": 978177,"Sequence number": 10552387, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 768 + } + }, + { + "ph": "f", "id": 79, "pid": 2338710, "tid": 2379450, "ts": 6345937259716.755, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937259719.877, "dur": 4.703, + "args": { + "External id": 978178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937259721.330, "dur": 2.628, + "args": { + "External id": 978179,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937259722.806, "dur": 0.867, + "args": { + "External id": 978180,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937259725.901, "dur": 80.129, + "args": { + "External id": 978181,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937259807.390, "dur": 12.434, + "args": { + "External id": 978182,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937259811.739, "dur": 7.253, + "args": { + "External id": 978183,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937259815.256, "dur": 3.596, + "args": { + "External id": 978184,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937259821.404, "dur": 3.242, + "args": { + "External id": 978185,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937259822.633, "dur": 1.520, + "args": { + "External id": 978186,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937259823.453, "dur": 0.599, + "args": { + "External id": 978187,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937259825.247, "dur": 41.580, + "args": { + "External id": 978188,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937259881.859, "dur": 11.920, + "args": { + "External id": 978189,"Record function id": 0, "Sequence number": 10552386, "Fwd thread id": 1, "Ev Idx": 780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937259883.041, "dur": 8.378, + "args": { + "External id": 978190,"Sequence number": 10552386, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 781 + } + }, + { + "ph": "f", "id": 80, "pid": 2338710, "tid": 2379450, "ts": 6345937259883.041, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937259884.856, "dur": 6.365, + "args": { + "External id": 978191,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937259890.048, "dur": 1.064, + "args": { + "External id": 978192,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937259899.326, "dur": 8.040, + "args": { + "External id": 978193,"Record function id": 0, "Sequence number": 10552385, "Fwd thread id": 1, "Ev Idx": 784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937259900.689, "dur": 4.505, + "args": { + "External id": 978194,"Sequence number": 10552385, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 785 + } + }, + { + "ph": "f", "id": 81, "pid": 2338710, "tid": 2379450, "ts": 6345937259900.689, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937259901.606, "dur": 3.328, + "args": { + "External id": 978195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937259902.401, "dur": 2.027, + "args": { + "External id": 978196,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937259903.592, "dur": 0.695, + "args": { + "External id": 978197,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937259912.106, "dur": 5.754, + "args": { + "External id": 978198,"Record function id": 0, "Ev Idx": 789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937259913.376, "dur": 3.893, + "args": { + "External id": 978199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937259914.492, "dur": 2.538, + "args": { + "External id": 978200,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937259915.283, "dur": 1.632, + "args": { + "External id": 978201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937259921.288, "dur": 9.639, + "args": { + "External id": 978202,"Record function id": 0, "Sequence number": 10552384, "Fwd thread id": 1, "Ev Idx": 793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937259924.755, "dur": 3.753, + "args": { + "External id": 978203,"Sequence number": 10552384, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 794 + } + }, + { + "ph": "f", "id": 82, "pid": 2338710, "tid": 2379450, "ts": 6345937259924.755, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937259926.130, "dur": 2.202, + "args": { + "External id": 978204,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937259926.719, "dur": 1.500, + "args": { + "External id": 978205,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937259935.521, "dur": 480.212, + "args": { + "External id": 978206,"Record function id": 0, "Sequence number": 10552383, "Fwd thread id": 1, "Ev Idx": 797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937259938.717, "dur": 455.232, + "args": { + "External id": 978207,"Sequence number": 10552383, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 798 + } + }, + { + "ph": "f", "id": 83, "pid": 2338710, "tid": 2379450, "ts": 6345937259938.717, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345937259955.127, "dur": 6.584, + "args": { + "External id": 978208,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937259957.157, "dur": 4.093, + "args": { + "External id": 978209,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345937259963.888, "dur": 7.805, + "args": { + "External id": 978210,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937259967.513, "dur": 3.942, + "args": { + "External id": 978211,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345937259973.688, "dur": 3.942, + "args": { + "External id": 978212,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937259974.752, "dur": 2.677, + "args": { + "External id": 978213,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937260032.493, "dur": 330.019, + "args": { + "External id": 978214,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937260167.000, "dur": 6.400, + "args": { + "External id": 978215,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937260178.282, "dur": 6.274, + "args": { + "External id": 978216,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937260187.952, "dur": 1.957, + "args": { + "External id": 978217,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937260190.838, "dur": 1.791, + "args": { + "External id": 978218,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937260243.642, "dur": 3.332, + "args": { + "External id": 978219,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937260244.819, "dur": 2.045, + "args": { + "External id": 978220,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937260248.752, "dur": 37.289, + "args": { + "External id": 978221,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937260259.501, "dur": 1.251, + "args": { + "External id": 978222,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937260287.205, "dur": 1.216, + "args": { + "External id": 978223,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937260287.817, "dur": 0.485, + "args": { + "External id": 978224,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937260289.260, "dur": 20.380, + "args": { + "External id": 978225,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937260290.936, "dur": 5.769, + "args": { + "External id": 978226,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2379450, + "ts": 6345937260379.174, "dur": 4.675, + "args": { + "External id": 978227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2379450, + "ts": 6345937260387.109, "dur": 0.710, + "args": { + "External id": 978228,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2379450, + "ts": 6345937260390.121, "dur": 0.727, + "args": { + "External id": 978229,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937260429.440, "dur": 271.308, + "args": { + "External id": 978230,"Record function id": 0, "Sequence number": 10552382, "Fwd thread id": 1, "Ev Idx": 821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937260431.436, "dur": 261.376, + "args": { + "External id": 978231,"Sequence number": 10552382, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 822 + } + }, + { + "ph": "f", "id": 84, "pid": 2338710, "tid": 2379450, "ts": 6345937260431.436, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2379450, + "ts": 6345937260455.507, "dur": 54.227, + "args": { + "External id": 978232,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937260461.755, "dur": 3.952, + "args": { + "External id": 978233,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937260467.472, "dur": 41.560, + "args": { + "External id": 978234,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345937260521.626, "dur": 4.919, + "args": { + "External id": 978235,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937260523.412, "dur": 2.716, + "args": { + "External id": 978236,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937260709.370, "dur": 188.547, + "args": { + "External id": 978237,"Record function id": 0, "Sequence number": 10552381, "Fwd thread id": 1, "Ev Idx": 828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937260711.146, "dur": 178.421, + "args": { + "External id": 978238,"Sequence number": 10552381, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 829 + } + }, + { + "ph": "f", "id": 85, "pid": 2338710, "tid": 2379450, "ts": 6345937260711.146, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2379450, + "ts": 6345937260723.462, "dur": 52.232, + "args": { + "External id": 978239,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937260728.966, "dur": 3.810, + "args": { + "External id": 978240,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937260733.997, "dur": 41.065, + "args": { + "External id": 978241,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345937260783.919, "dur": 4.082, + "args": { + "External id": 978242,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937260785.353, "dur": 2.335, + "args": { + "External id": 978243,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937260905.328, "dur": 18.102, + "args": { + "External id": 978244,"Record function id": 0, "Sequence number": 10552380, "Fwd thread id": 1, "Ev Idx": 835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937260906.894, "dur": 13.862, + "args": { + "External id": 978245,"Sequence number": 10552380, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 836 + } + }, + { + "ph": "f", "id": 86, "pid": 2338710, "tid": 2379450, "ts": 6345937260906.894, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937260909.428, "dur": 10.973, + "args": { + "External id": 978246,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937260913.293, "dur": 6.820, + "args": { + "External id": 978247,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937260927.672, "dur": 7.010, + "args": { + "External id": 978248,"Record function id": 0, "Sequence number": 10552379, "Fwd thread id": 1, "Ev Idx": 839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937260928.702, "dur": 3.541, + "args": { + "External id": 978249,"Sequence number": 10552379, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 840 + } + }, + { + "ph": "f", "id": 87, "pid": 2338710, "tid": 2379450, "ts": 6345937260928.702, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937260929.736, "dur": 2.344, + "args": { + "External id": 978250,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937260930.642, "dur": 1.256, + "args": { + "External id": 978251,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937260941.383, "dur": 11.222, + "args": { + "External id": 978252,"Record function id": 0, "Sequence number": 10552378, "Fwd thread id": 1, "Ev Idx": 843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937260944.716, "dur": 5.616, + "args": { + "External id": 978253,"Sequence number": 10552378, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 844 + } + }, + { + "ph": "f", "id": 88, "pid": 2338710, "tid": 2379450, "ts": 6345937260944.716, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937260945.762, "dur": 4.404, + "args": { + "External id": 978254,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937260949.112, "dur": 0.896, + "args": { + "External id": 978255,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937260956.692, "dur": 6.626, + "args": { + "External id": 978256,"Record function id": 0, "Sequence number": 10552377, "Fwd thread id": 1, "Ev Idx": 847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937260957.547, "dur": 2.865, + "args": { + "External id": 978257,"Sequence number": 10552377, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 848 + } + }, + { + "ph": "f", "id": 89, "pid": 2338710, "tid": 2379450, "ts": 6345937260957.547, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937260958.516, "dur": 1.721, + "args": { + "External id": 978258,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937260959.150, "dur": 0.992, + "args": { + "External id": 978259,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937260967.349, "dur": 271.382, + "args": { + "External id": 978260,"Record function id": 0, "Sequence number": 10552376, "Fwd thread id": 1, "Ev Idx": 851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937260968.106, "dur": 258.761, + "args": { + "External id": 978261,"Sequence number": 10552376, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 852 + } + }, + { + "ph": "f", "id": 90, "pid": 2338710, "tid": 2379450, "ts": 6345937260968.106, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937260975.251, "dur": 6.741, + "args": { + "External id": 978262,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937260977.365, "dur": 3.854, + "args": { + "External id": 978263,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937260979.387, "dur": 1.472, + "args": { + "External id": 978264,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937260983.657, "dur": 156.121, + "args": { + "External id": 978265,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937261143.265, "dur": 10.871, + "args": { + "External id": 978266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937261145.059, "dur": 7.884, + "args": { + "External id": 978267,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937261150.703, "dur": 1.968, + "args": { + "External id": 978268,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937261156.639, "dur": 3.393, + "args": { + "External id": 978269,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937261157.950, "dur": 1.534, + "args": { + "External id": 978270,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937261158.903, "dur": 0.492, + "args": { + "External id": 978271,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937261161.357, "dur": 64.425, + "args": { + "External id": 978272,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937261247.765, "dur": 13.605, + "args": { + "External id": 978273,"Record function id": 0, "Sequence number": 10552375, "Fwd thread id": 1, "Ev Idx": 864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937261249.157, "dur": 9.885, + "args": { + "External id": 978274,"Sequence number": 10552375, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 865 + } + }, + { + "ph": "f", "id": 91, "pid": 2338710, "tid": 2379450, "ts": 6345937261249.157, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937261251.202, "dur": 7.679, + "args": { + "External id": 978275,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937261256.950, "dur": 1.771, + "args": { + "External id": 978276,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937261265.391, "dur": 7.489, + "args": { + "External id": 978277,"Record function id": 0, "Sequence number": 10552374, "Fwd thread id": 1, "Ev Idx": 868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937261266.397, "dur": 4.479, + "args": { + "External id": 978278,"Sequence number": 10552374, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 869 + } + }, + { + "ph": "f", "id": 92, "pid": 2338710, "tid": 2379450, "ts": 6345937261266.397, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937261267.454, "dur": 3.192, + "args": { + "External id": 978279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937261268.343, "dur": 1.747, + "args": { + "External id": 978280,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937261269.458, "dur": 0.435, + "args": { + "External id": 978281,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937261279.650, "dur": 13.744, + "args": { + "External id": 978282,"Record function id": 0, "Ev Idx": 873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937261281.256, "dur": 11.043, + "args": { + "External id": 978283,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937261284.483, "dur": 7.349, + "args": { + "External id": 978284,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937261288.630, "dur": 3.087, + "args": { + "External id": 978285,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937261297.220, "dur": 9.383, + "args": { + "External id": 978286,"Record function id": 0, "Sequence number": 10552373, "Fwd thread id": 1, "Ev Idx": 877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937261298.128, "dur": 5.589, + "args": { + "External id": 978287,"Sequence number": 10552373, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 878 + } + }, + { + "ph": "f", "id": 93, "pid": 2338710, "tid": 2379450, "ts": 6345937261298.128, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937261299.240, "dur": 4.298, + "args": { + "External id": 978288,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937261302.244, "dur": 1.173, + "args": { + "External id": 978289,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937261310.289, "dur": 121.302, + "args": { + "External id": 978290,"Record function id": 0, "Sequence number": 10552372, "Fwd thread id": 1, "Ev Idx": 881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937261311.025, "dur": 113.183, + "args": { + "External id": 978291,"Sequence number": 10552372, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 882 + } + }, + { + "ph": "f", "id": 94, "pid": 2338710, "tid": 2379450, "ts": 6345937261311.025, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937261313.732, "dur": 5.671, + "args": { + "External id": 978292,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937261314.299, "dur": 4.590, + "args": { + "External id": 978293,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937261318.178, "dur": 0.562, + "args": { + "External id": 978294,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937261320.383, "dur": 33.508, + "args": { + "External id": 978295,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937261355.463, "dur": 11.387, + "args": { + "External id": 978296,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937261362.140, "dur": 4.051, + "args": { + "External id": 978297,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937261363.315, "dur": 2.676, + "args": { + "External id": 978298,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937261368.397, "dur": 9.990, + "args": { + "External id": 978299,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937261371.773, "dur": 6.089, + "args": { + "External id": 978300,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937261374.992, "dur": 2.779, + "args": { + "External id": 978301,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937261379.264, "dur": 43.878, + "args": { + "External id": 978302,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937261436.649, "dur": 40.121, + "args": { + "External id": 978303,"Record function id": 0, "Sequence number": 10552371, "Fwd thread id": 1, "Ev Idx": 894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937261437.670, "dur": 4.058, + "args": { + "External id": 978304,"Sequence number": 10552371, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 895 + } + }, + { + "ph": "f", "id": 95, "pid": 2338710, "tid": 2379450, "ts": 6345937261437.670, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937261439.247, "dur": 2.323, + "args": { + "External id": 978305,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937261440.052, "dur": 1.352, + "args": { + "External id": 978306,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2379450, + "ts": 6345937261448.211, "dur": 25.505, + "args": { + "External id": 978307,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937261483.928, "dur": 19.676, + "args": { + "External id": 978308,"Record function id": 0, "Sequence number": 10552370, "Fwd thread id": 1, "Ev Idx": 899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937261484.952, "dur": 16.051, + "args": { + "External id": 978309,"Sequence number": 10552370, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 900 + } + }, + { + "ph": "f", "id": 96, "pid": 2338710, "tid": 2379450, "ts": 6345937261484.952, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937261488.788, "dur": 11.959, + "args": { + "External id": 978310,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937261489.771, "dur": 10.357, + "args": { + "External id": 978311,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937261499.318, "dur": 0.639, + "args": { + "External id": 978312,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937261508.431, "dur": 5.933, + "args": { + "External id": 978313,"Record function id": 0, "Ev Idx": 904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937261509.957, "dur": 3.774, + "args": { + "External id": 978314,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937261511.327, "dur": 2.042, + "args": { + "External id": 978315,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937261511.977, "dur": 1.238, + "args": { + "External id": 978316,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937261521.053, "dur": 8.789, + "args": { + "External id": 978317,"Record function id": 0, "Sequence number": 10552369, "Fwd thread id": 1, "Ev Idx": 908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937261522.219, "dur": 5.045, + "args": { + "External id": 978318,"Sequence number": 10552369, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 909 + } + }, + { + "ph": "f", "id": 97, "pid": 2338710, "tid": 2379450, "ts": 6345937261522.219, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937261523.070, "dur": 4.007, + "args": { + "External id": 978319,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937261525.794, "dur": 1.116, + "args": { + "External id": 978320,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937261533.437, "dur": 112.440, + "args": { + "External id": 978321,"Record function id": 0, "Sequence number": 10552368, "Fwd thread id": 1, "Ev Idx": 912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937261534.234, "dur": 103.225, + "args": { + "External id": 978322,"Sequence number": 10552368, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 913 + } + }, + { + "ph": "f", "id": 98, "pid": 2338710, "tid": 2379450, "ts": 6345937261534.234, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937261536.857, "dur": 2.354, + "args": { + "External id": 978323,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937261537.360, "dur": 1.363, + "args": { + "External id": 978324,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937261538.085, "dur": 0.492, + "args": { + "External id": 978325,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937261540.104, "dur": 43.248, + "args": { + "External id": 978326,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937261586.844, "dur": 6.462, + "args": { + "External id": 978327,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937261587.599, "dur": 5.120, + "args": { + "External id": 978328,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937261591.458, "dur": 1.113, + "args": { + "External id": 978329,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937261594.554, "dur": 3.179, + "args": { + "External id": 978330,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937261595.645, "dur": 1.583, + "args": { + "External id": 978331,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937261596.591, "dur": 0.561, + "args": { + "External id": 978332,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937261600.577, "dur": 36.068, + "args": { + "External id": 978333,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937261650.928, "dur": 28.911, + "args": { + "External id": 978334,"Record function id": 0, "Sequence number": 10552367, "Fwd thread id": 1, "Ev Idx": 925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937261651.876, "dur": 5.984, + "args": { + "External id": 978335,"Sequence number": 10552367, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 926 + } + }, + { + "ph": "f", "id": 99, "pid": 2338710, "tid": 2379450, "ts": 6345937261651.876, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937261653.345, "dur": 4.333, + "args": { + "External id": 978336,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937261656.462, "dur": 1.085, + "args": { + "External id": 978337,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345937261660.518, "dur": 16.720, + "args": { + "External id": 978338,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937261684.352, "dur": 10.099, + "args": { + "External id": 978339,"Record function id": 0, "Sequence number": 10552366, "Fwd thread id": 1, "Ev Idx": 930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937261685.381, "dur": 7.295, + "args": { + "External id": 978340,"Sequence number": 10552366, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 931 + } + }, + { + "ph": "f", "id": 100, "pid": 2338710, "tid": 2379450, "ts": 6345937261685.381, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937261686.418, "dur": 6.046, + "args": { + "External id": 978341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937261687.312, "dur": 4.597, + "args": { + "External id": 978342,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937261691.321, "dur": 0.426, + "args": { + "External id": 978343,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937261699.205, "dur": 6.047, + "args": { + "External id": 978344,"Record function id": 0, "Ev Idx": 935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937261700.818, "dur": 3.887, + "args": { + "External id": 978345,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937261702.256, "dur": 1.899, + "args": { + "External id": 978346,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937261702.858, "dur": 1.195, + "args": { + "External id": 978347,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937261709.880, "dur": 494.637, + "args": { + "External id": 978348,"Record function id": 0, "Sequence number": 10552365, "Fwd thread id": 1, "Ev Idx": 939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937261711.230, "dur": 458.544, + "args": { + "External id": 978349,"Sequence number": 10552365, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 940 + } + }, + { + "ph": "f", "id": 101, "pid": 2338710, "tid": 2379450, "ts": 6345937261711.230, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937261757.092, "dur": 1.985, + "args": { + "External id": 978350,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937261757.762, "dur": 1.105, + "args": { + "External id": 978351,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937261776.338, "dur": 6.544, + "args": { + "External id": 978352,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937261796.239, "dur": 2.414, + "args": { + "External id": 978353,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937261974.235, "dur": 2.287, + "args": { + "External id": 978354,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937261981.264, "dur": 66.033, + "args": { + "External id": 978355,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937261996.078, "dur": 1.162, + "args": { + "External id": 978356,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345937262093.595, "dur": 41.022, + "args": { + "External id": 978357,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937262096.817, "dur": 37.555, + "args": { + "External id": 978358,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937262102.079, "dur": 7.867, + "args": { + "External id": 978359,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937262112.050, "dur": 21.774, + "args": { + "External id": 978360,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2379450, + "ts": 6345937262144.369, "dur": 3.567, + "args": { + "External id": 978361,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937262145.774, "dur": 2.031, + "args": { + "External id": 978362,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937262159.306, "dur": 2.085, + "args": { + "External id": 978363,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937262160.216, "dur": 1.071, + "args": { + "External id": 978364,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345937262181.106, "dur": 18.635, + "args": { + "External id": 978365,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937262220.028, "dur": 12.426, + "args": { + "External id": 978366,"Record function id": 0, "Ev Idx": 957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937262221.995, "dur": 9.423, + "args": { + "External id": 978367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937262224.459, "dur": 5.511, + "args": { + "External id": 978368,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937262227.818, "dur": 2.008, + "args": { + "External id": 978369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937262237.035, "dur": 6.958, + "args": { + "External id": 978370,"Record function id": 0, "Sequence number": 10552364, "Fwd thread id": 1, "Ev Idx": 961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937262238.904, "dur": 1.449, + "args": { + "External id": 978371,"Sequence number": 10552364, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 962 + } + }, + { + "ph": "f", "id": 102, "pid": 2338710, "tid": 2379450, "ts": 6345937262238.904, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937262248.294, "dur": 479.360, + "args": { + "External id": 978372,"Record function id": 0, "Sequence number": 10552363, "Fwd thread id": 1, "Ev Idx": 963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937262249.595, "dur": 466.824, + "args": { + "External id": 978373,"Sequence number": 10552363, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 964 + } + }, + { + "ph": "f", "id": 103, "pid": 2338710, "tid": 2379450, "ts": 6345937262249.595, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937262283.373, "dur": 11.565, + "args": { + "External id": 978374,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338710, "tid": 2379450, + "ts": 6345937262290.840, "dur": 3.760, + "args": { + "External id": 978375,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937262298.386, "dur": 8.938, + "args": { + "External id": 978376,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937262303.673, "dur": 2.885, + "args": { + "External id": 978377,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937262305.484, "dur": 0.884, + "args": { + "External id": 978378,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2379450, + "ts": 6345937262311.365, "dur": 110.652, + "args": { + "External id": 978379,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937262312.551, "dur": 5.602, + "args": { + "External id": 978380,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937262313.370, "dur": 4.255, + "args": { + "External id": 978381,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937262314.239, "dur": 3.291, + "args": { + "External id": 978382,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2379450, + "ts": 6345937262322.743, "dur": 98.836, + "args": { + "External id": 978383,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937262324.900, "dur": 95.640, + "args": { + "External id": 978384,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2379450, + "ts": 6345937262426.527, "dur": 5.513, + "args": { + "External id": 978385,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937262429.866, "dur": 1.998, + "args": { + "External id": 978386,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937262470.363, "dur": 6.965, + "args": { + "External id": 978387,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937262478.898, "dur": 2.323, + "args": { + "External id": 978388,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937262481.936, "dur": 2.067, + "args": { + "External id": 978389,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937262523.422, "dur": 2.368, + "args": { + "External id": 978390,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937262524.186, "dur": 1.436, + "args": { + "External id": 978391,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338710, "tid": 2379450, + "ts": 6345937262550.370, "dur": 146.044, + "args": { + "External id": 978392,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2379450, + "ts": 6345937262556.694, "dur": 5.522, + "args": { + "External id": 978393,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937262560.240, "dur": 0.934, + "args": { + "External id": 978394,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345937262565.726, "dur": 9.615, + "args": { + "External id": 978395,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937262571.182, "dur": 3.099, + "args": { + "External id": 978396,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2379450, + "ts": 6345937262576.963, "dur": 3.987, + "args": { + "External id": 978397,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937262580.132, "dur": 0.445, + "args": { + "External id": 978398,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345937262581.694, "dur": 4.840, + "args": { + "External id": 978399,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937262585.496, "dur": 0.451, + "args": { + "External id": 978400,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345937262593.027, "dur": 4.840, + "args": { + "External id": 978401,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937262596.787, "dur": 0.726, + "args": { + "External id": 978402,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937262598.858, "dur": 8.317, + "args": { + "External id": 978403,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338710, "tid": 2379450, + "ts": 6345937262604.395, "dur": 2.564, + "args": { + "External id": 978404,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345937262607.874, "dur": 2.358, + "args": { + "External id": 978405,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937262609.194, "dur": 0.684, + "args": { + "External id": 978406,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937262613.305, "dur": 4.560, + "args": { + "External id": 978407,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937262614.095, "dur": 3.651, + "args": { + "External id": 978408,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345937262619.287, "dur": 58.491, + "args": { + "External id": 978409,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 1000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937262682.167, "dur": 3.866, + "args": { + "External id": 978410,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 1001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345937262686.844, "dur": 4.798, + "args": { + "External id": 978411,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 1002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937262690.221, "dur": 0.885, + "args": { + "External id": 978412,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 1003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937262694.026, "dur": 0.972, + "args": { + "External id": 978413,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 1004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937262738.267, "dur": 9.276, + "args": { + "External id": 978414,"Record function id": 0, "Ev Idx": 1005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937262740.336, "dur": 6.127, + "args": { + "External id": 978415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937262742.295, "dur": 3.324, + "args": { + "External id": 978416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937262743.420, "dur": 2.064, + "args": { + "External id": 978417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937262752.030, "dur": 12.351, + "args": { + "External id": 978418,"Record function id": 0, "Sequence number": 10552362, "Fwd thread id": 1, "Ev Idx": 1009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937262752.904, "dur": 9.085, + "args": { + "External id": 978419,"Sequence number": 10552362, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 1010 + } + }, + { + "ph": "f", "id": 104, "pid": 2338710, "tid": 2379450, "ts": 6345937262752.904, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937262754.948, "dur": 6.787, + "args": { + "External id": 978420,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937262760.379, "dur": 1.203, + "args": { + "External id": 978421,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937262768.651, "dur": 150.567, + "args": { + "External id": 978422,"Record function id": 0, "Sequence number": 10552361, "Fwd thread id": 1, "Ev Idx": 1013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937262769.582, "dur": 142.808, + "args": { + "External id": 978423,"Sequence number": 10552361, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1014 + } + }, + { + "ph": "f", "id": 105, "pid": 2338710, "tid": 2379450, "ts": 6345937262769.582, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937262773.603, "dur": 3.658, + "args": { + "External id": 978424,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937262774.466, "dur": 2.162, + "args": { + "External id": 978425,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 1016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937262775.816, "dur": 0.640, + "args": { + "External id": 978426,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 1017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937262780.877, "dur": 44.592, + "args": { + "External id": 978427,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 1018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937262827.255, "dur": 3.973, + "args": { + "External id": 978428,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937262828.104, "dur": 2.212, + "args": { + "External id": 978429,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 1020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937262829.221, "dur": 0.892, + "args": { + "External id": 978430,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 1021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937262832.733, "dur": 33.247, + "args": { + "External id": 978431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937262833.504, "dur": 31.891, + "args": { + "External id": 978432,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937262864.859, "dur": 0.454, + "args": { + "External id": 978433,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937262866.630, "dur": 44.692, + "args": { + "External id": 978434,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 1025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937262927.350, "dur": 6.836, + "args": { + "External id": 978435,"Record function id": 0, "Sequence number": 10552360, "Fwd thread id": 1, "Ev Idx": 1026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937262928.393, "dur": 4.175, + "args": { + "External id": 978436,"Sequence number": 10552360, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1027 + } + }, + { + "ph": "f", "id": 106, "pid": 2338710, "tid": 2379450, "ts": 6345937262928.393, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937262930.152, "dur": 2.209, + "args": { + "External id": 978437,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937262930.940, "dur": 1.296, + "args": { + "External id": 978438,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937262938.134, "dur": 9.901, + "args": { + "External id": 978439,"Record function id": 0, "Sequence number": 10552359, "Fwd thread id": 1, "Ev Idx": 1030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937262939.136, "dur": 6.293, + "args": { + "External id": 978440,"Sequence number": 10552359, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1031 + } + }, + { + "ph": "f", "id": 107, "pid": 2338710, "tid": 2379450, "ts": 6345937262939.136, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937262939.895, "dur": 5.273, + "args": { + "External id": 978441,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937262940.840, "dur": 3.813, + "args": { + "External id": 978442,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937262944.096, "dur": 0.425, + "args": { + "External id": 978443,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937262955.206, "dur": 5.189, + "args": { + "External id": 978444,"Record function id": 0, "Ev Idx": 1035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937262956.612, "dur": 3.152, + "args": { + "External id": 978445,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937262957.662, "dur": 1.813, + "args": { + "External id": 978446,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937262958.260, "dur": 1.094, + "args": { + "External id": 978447,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937262963.816, "dur": 5.481, + "args": { + "External id": 978448,"Record function id": 0, "Sequence number": 10552358, "Fwd thread id": 1, "Ev Idx": 1039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937262964.800, "dur": 2.737, + "args": { + "External id": 978449,"Sequence number": 10552358, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 1040 + } + }, + { + "ph": "f", "id": 108, "pid": 2338710, "tid": 2379450, "ts": 6345937262964.800, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937262965.855, "dur": 1.512, + "args": { + "External id": 978450,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937262966.367, "dur": 0.841, + "args": { + "External id": 978451,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937262972.898, "dur": 193.551, + "args": { + "External id": 978452,"Record function id": 0, "Sequence number": 10552357, "Fwd thread id": 1, "Ev Idx": 1043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937262973.603, "dur": 182.699, + "args": { + "External id": 978453,"Sequence number": 10552357, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1044 + } + }, + { + "ph": "f", "id": 109, "pid": 2338710, "tid": 2379450, "ts": 6345937262973.603, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937262978.366, "dur": 2.522, + "args": { + "External id": 978454,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937262978.922, "dur": 1.475, + "args": { + "External id": 978455,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 1046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937262979.688, "dur": 0.586, + "args": { + "External id": 978456,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 1047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937262981.540, "dur": 63.282, + "args": { + "External id": 978457,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 1048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937263047.468, "dur": 55.471, + "args": { + "External id": 978458,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937263048.420, "dur": 53.233, + "args": { + "External id": 978459,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 1050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937263098.690, "dur": 1.296, + "args": { + "External id": 978460,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 1051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937263104.573, "dur": 5.893, + "args": { + "External id": 978461,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937263105.882, "dur": 3.852, + "args": { + "External id": 978462,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937263107.042, "dur": 2.580, + "args": { + "External id": 978463,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937263111.299, "dur": 44.050, + "args": { + "External id": 978464,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 1055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937263174.813, "dur": 40.151, + "args": { + "External id": 978465,"Record function id": 0, "Sequence number": 10552356, "Fwd thread id": 1, "Ev Idx": 1056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937263176.160, "dur": 8.683, + "args": { + "External id": 978466,"Sequence number": 10552356, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1057 + } + }, + { + "ph": "f", "id": 110, "pid": 2338710, "tid": 2379450, "ts": 6345937263176.160, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937263177.849, "dur": 6.836, + "args": { + "External id": 978467,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937263183.083, "dur": 1.453, + "args": { + "External id": 978468,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2379450, + "ts": 6345937263188.349, "dur": 23.389, + "args": { + "External id": 978469,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937263219.546, "dur": 8.187, + "args": { + "External id": 978470,"Record function id": 0, "Sequence number": 10552355, "Fwd thread id": 1, "Ev Idx": 1061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937263220.555, "dur": 4.996, + "args": { + "External id": 978471,"Sequence number": 10552355, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1062 + } + }, + { + "ph": "f", "id": 111, "pid": 2338710, "tid": 2379450, "ts": 6345937263220.555, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937263221.685, "dur": 3.609, + "args": { + "External id": 978472,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937263222.776, "dur": 1.919, + "args": { + "External id": 978473,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937263223.843, "dur": 0.700, + "args": { + "External id": 978474,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937263232.837, "dur": 9.208, + "args": { + "External id": 978475,"Record function id": 0, "Ev Idx": 1066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937263234.154, "dur": 7.279, + "args": { + "External id": 978476,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937263235.692, "dur": 5.336, + "args": { + "External id": 978477,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937263239.409, "dur": 1.498, + "args": { + "External id": 978478,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937263246.606, "dur": 487.285, + "args": { + "External id": 978479,"Record function id": 0, "Sequence number": 10552354, "Fwd thread id": 1, "Ev Idx": 1070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937263247.976, "dur": 448.087, + "args": { + "External id": 978480,"Sequence number": 10552354, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 1071 + } + }, + { + "ph": "f", "id": 112, "pid": 2338710, "tid": 2379450, "ts": 6345937263247.976, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338710, "tid": 2379450, + "ts": 6345937263277.163, "dur": 42.562, + "args": { + "External id": 978481,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2379450, + "ts": 6345937263279.083, "dur": 40.420, + "args": { + "External id": 978482,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345937263284.692, "dur": 7.297, + "args": { + "External id": 978483,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 1074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937263287.622, "dur": 3.678, + "args": { + "External id": 978484,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937263294.517, "dur": 24.376, + "args": { + "External id": 978485,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937263333.044, "dur": 2.479, + "args": { + "External id": 978486,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937263333.865, "dur": 1.478, + "args": { + "External id": 978487,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937263339.969, "dur": 5.787, + "args": { + "External id": 978488,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937263342.801, "dur": 2.847, + "args": { + "External id": 978489,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937263359.953, "dur": 3.015, + "args": { + "External id": 978490,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937263378.316, "dur": 2.367, + "args": { + "External id": 978491,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937263567.336, "dur": 3.367, + "args": { + "External id": 978492,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 1083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937263575.547, "dur": 41.646, + "args": { + "External id": 978493,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 1084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937263590.216, "dur": 1.259, + "args": { + "External id": 978494,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 1085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345937263624.275, "dur": 32.708, + "args": { + "External id": 978495,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 1086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937263627.838, "dur": 28.898, + "args": { + "External id": 978496,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 1087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937263632.487, "dur": 6.289, + "args": { + "External id": 978497,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937263640.621, "dur": 15.494, + "args": { + "External id": 978498,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 1089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2379450, + "ts": 6345937263661.771, "dur": 2.537, + "args": { + "External id": 978499,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 1090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937263663.169, "dur": 1.011, + "args": { + "External id": 978500,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 1091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937263671.674, "dur": 7.296, + "args": { + "External id": 978501,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937263677.699, "dur": 1.170, + "args": { + "External id": 978502,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937263680.989, "dur": 5.014, + "args": { + "External id": 978503,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937263681.977, "dur": 3.913, + "args": { + "External id": 978504,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345937263710.924, "dur": 21.319, + "args": { + "External id": 978505,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937263745.444, "dur": 8.149, + "args": { + "External id": 978506,"Record function id": 0, "Ev Idx": 1097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937263747.308, "dur": 5.333, + "args": { + "External id": 978507,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937263749.177, "dur": 2.438, + "args": { + "External id": 978508,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937263750.043, "dur": 1.474, + "args": { + "External id": 978509,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937263757.807, "dur": 12.512, + "args": { + "External id": 978510,"Record function id": 0, "Sequence number": 10552353, "Fwd thread id": 1, "Ev Idx": 1101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937263758.662, "dur": 8.901, + "args": { + "External id": 978511,"Sequence number": 10552353, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1102 + } + }, + { + "ph": "f", "id": 113, "pid": 2338710, "tid": 2379450, "ts": 6345937263758.662, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937263760.388, "dur": 6.982, + "args": { + "External id": 978512,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937263765.708, "dur": 1.468, + "args": { + "External id": 978513,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937263774.817, "dur": 167.474, + "args": { + "External id": 978514,"Record function id": 0, "Sequence number": 10552352, "Fwd thread id": 1, "Ev Idx": 1105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937263775.754, "dur": 158.951, + "args": { + "External id": 978515,"Sequence number": 10552352, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1106 + } + }, + { + "ph": "f", "id": 114, "pid": 2338710, "tid": 2379450, "ts": 6345937263775.754, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937263778.903, "dur": 5.160, + "args": { + "External id": 978516,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937263780.515, "dur": 2.939, + "args": { + "External id": 978517,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 1108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937263782.145, "dur": 1.113, + "args": { + "External id": 978518,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 1109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937263787.645, "dur": 77.480, + "args": { + "External id": 978519,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 1110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937263866.764, "dur": 3.733, + "args": { + "External id": 978520,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937263867.548, "dur": 2.311, + "args": { + "External id": 978521,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937263868.844, "dur": 0.861, + "args": { + "External id": 978522,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937263872.280, "dur": 16.703, + "args": { + "External id": 978523,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937263882.898, "dur": 5.554, + "args": { + "External id": 978524,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937263887.802, "dur": 0.515, + "args": { + "External id": 978525,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937263889.615, "dur": 44.013, + "args": { + "External id": 978526,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 1117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937263950.305, "dur": 9.085, + "args": { + "External id": 978527,"Record function id": 0, "Sequence number": 10552351, "Fwd thread id": 1, "Ev Idx": 1118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937263951.447, "dur": 5.534, + "args": { + "External id": 978528,"Sequence number": 10552351, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1119 + } + }, + { + "ph": "f", "id": 115, "pid": 2338710, "tid": 2379450, "ts": 6345937263951.447, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937263952.712, "dur": 4.081, + "args": { + "External id": 978529,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937263953.477, "dur": 3.201, + "args": { + "External id": 978530,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937263963.173, "dur": 11.330, + "args": { + "External id": 978531,"Record function id": 0, "Sequence number": 10552350, "Fwd thread id": 1, "Ev Idx": 1122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937263963.914, "dur": 8.683, + "args": { + "External id": 978532,"Sequence number": 10552350, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1123 + } + }, + { + "ph": "f", "id": 116, "pid": 2338710, "tid": 2379450, "ts": 6345937263963.914, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937263964.538, "dur": 7.793, + "args": { + "External id": 978533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937263965.257, "dur": 6.516, + "args": { + "External id": 978534,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937263971.164, "dur": 0.463, + "args": { + "External id": 978535,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937263978.885, "dur": 5.486, + "args": { + "External id": 978536,"Record function id": 0, "Ev Idx": 1127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937263980.222, "dur": 3.575, + "args": { + "External id": 978537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937263981.351, "dur": 2.120, + "args": { + "External id": 978538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937263982.264, "dur": 1.100, + "args": { + "External id": 978539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937263987.817, "dur": 6.072, + "args": { + "External id": 978540,"Record function id": 0, "Sequence number": 10552349, "Fwd thread id": 1, "Ev Idx": 1131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937263988.515, "dur": 3.777, + "args": { + "External id": 978541,"Sequence number": 10552349, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1132 + } + }, + { + "ph": "f", "id": 117, "pid": 2338710, "tid": 2379450, "ts": 6345937263988.515, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937263989.611, "dur": 2.510, + "args": { + "External id": 978542,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937263990.591, "dur": 1.387, + "args": { + "External id": 978543,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937263998.302, "dur": 539.478, + "args": { + "External id": 978544,"Record function id": 0, "Sequence number": 10552348, "Fwd thread id": 1, "Ev Idx": 1135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937263999.436, "dur": 515.887, + "args": { + "External id": 978545,"Sequence number": 10552348, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1136 + } + }, + { + "ph": "f", "id": 118, "pid": 2338710, "tid": 2379450, "ts": 6345937263999.436, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345937264038.342, "dur": 53.959, + "args": { + "External id": 978546,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 1137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937264047.304, "dur": 43.683, + "args": { + "External id": 978547,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345937264097.838, "dur": 5.361, + "args": { + "External id": 978548,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937264099.245, "dur": 3.681, + "args": { + "External id": 978549,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345937264108.045, "dur": 7.388, + "args": { + "External id": 978550,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937264111.065, "dur": 4.119, + "args": { + "External id": 978551,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937264146.934, "dur": 333.456, + "args": { + "External id": 978552,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937264237.009, "dur": 7.718, + "args": { + "External id": 978553,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937264246.526, "dur": 5.234, + "args": { + "External id": 978554,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937264254.962, "dur": 2.608, + "args": { + "External id": 978555,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937264258.538, "dur": 4.500, + "args": { + "External id": 978556,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937264360.761, "dur": 4.111, + "args": { + "External id": 978557,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937264361.642, "dur": 3.014, + "args": { + "External id": 978558,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937264369.072, "dur": 34.489, + "args": { + "External id": 978559,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937264374.506, "dur": 1.615, + "args": { + "External id": 978560,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937264407.326, "dur": 1.488, + "args": { + "External id": 978561,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937264408.119, "dur": 0.595, + "args": { + "External id": 978562,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937264412.186, "dur": 18.770, + "args": { + "External id": 978563,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937264415.132, "dur": 0.461, + "args": { + "External id": 978564,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2379450, + "ts": 6345937264496.065, "dur": 4.748, + "args": { + "External id": 978565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2379450, + "ts": 6345937264508.280, "dur": 0.702, + "args": { + "External id": 978566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2379450, + "ts": 6345937264511.283, "dur": 0.698, + "args": { + "External id": 978567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937264553.807, "dur": 276.389, + "args": { + "External id": 978568,"Record function id": 0, "Sequence number": 10552347, "Fwd thread id": 1, "Ev Idx": 1159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937264555.900, "dur": 266.113, + "args": { + "External id": 978569,"Sequence number": 10552347, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1160 + } + }, + { + "ph": "f", "id": 119, "pid": 2338710, "tid": 2379450, "ts": 6345937264555.900, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2379450, + "ts": 6345937264578.274, "dur": 55.159, + "args": { + "External id": 978570,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937264581.791, "dur": 3.643, + "args": { + "External id": 978571,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937264587.211, "dur": 45.501, + "args": { + "External id": 978572,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 1163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345937264644.489, "dur": 7.065, + "args": { + "External id": 978573,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937264648.784, "dur": 2.272, + "args": { + "External id": 978574,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937264838.555, "dur": 260.559, + "args": { + "External id": 978575,"Record function id": 0, "Sequence number": 10552346, "Fwd thread id": 1, "Ev Idx": 1166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937264840.770, "dur": 209.393, + "args": { + "External id": 978576,"Sequence number": 10552346, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1167 + } + }, + { + "ph": "f", "id": 120, "pid": 2338710, "tid": 2379450, "ts": 6345937264840.770, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2379450, + "ts": 6345937264853.881, "dur": 51.307, + "args": { + "External id": 978577,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937264856.576, "dur": 6.084, + "args": { + "External id": 978578,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937264864.076, "dur": 40.476, + "args": { + "External id": 978579,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 1170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345937264913.826, "dur": 8.557, + "args": { + "External id": 978580,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 1171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937264918.508, "dur": 3.580, + "args": { + "External id": 978581,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265112.605, "dur": 17.359, + "args": { + "External id": 978582,"Record function id": 0, "Sequence number": 10552345, "Fwd thread id": 1, "Ev Idx": 1173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265114.914, "dur": 11.631, + "args": { + "External id": 978583,"Sequence number": 10552345, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1174 + } + }, + { + "ph": "f", "id": 121, "pid": 2338710, "tid": 2379450, "ts": 6345937265114.914, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937265118.592, "dur": 7.568, + "args": { + "External id": 978584,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937265119.828, "dur": 6.032, + "args": { + "External id": 978585,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265133.991, "dur": 11.870, + "args": { + "External id": 978586,"Record function id": 0, "Sequence number": 10552344, "Fwd thread id": 1, "Ev Idx": 1177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265137.516, "dur": 6.494, + "args": { + "External id": 978587,"Sequence number": 10552344, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1178 + } + }, + { + "ph": "f", "id": 122, "pid": 2338710, "tid": 2379450, "ts": 6345937265137.516, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937265138.722, "dur": 5.112, + "args": { + "External id": 978588,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937265142.094, "dur": 1.582, + "args": { + "External id": 978589,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265149.410, "dur": 5.642, + "args": { + "External id": 978590,"Record function id": 0, "Sequence number": 10552343, "Fwd thread id": 1, "Ev Idx": 1181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265150.145, "dur": 2.848, + "args": { + "External id": 978591,"Sequence number": 10552343, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1182 + } + }, + { + "ph": "f", "id": 123, "pid": 2338710, "tid": 2379450, "ts": 6345937265150.145, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937265150.969, "dur": 1.853, + "args": { + "External id": 978592,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937265151.802, "dur": 0.925, + "args": { + "External id": 978593,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265158.882, "dur": 10.502, + "args": { + "External id": 978594,"Record function id": 0, "Sequence number": 10552342, "Fwd thread id": 1, "Ev Idx": 1185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265159.766, "dur": 7.342, + "args": { + "External id": 978595,"Sequence number": 10552342, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 1186 + } + }, + { + "ph": "f", "id": 124, "pid": 2338710, "tid": 2379450, "ts": 6345937265159.766, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937265162.678, "dur": 4.265, + "args": { + "External id": 978596,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937265165.414, "dur": 1.434, + "args": { + "External id": 978597,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265175.882, "dur": 220.547, + "args": { + "External id": 978598,"Record function id": 0, "Sequence number": 10552341, "Fwd thread id": 1, "Ev Idx": 1189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265176.658, "dur": 210.507, + "args": { + "External id": 978599,"Sequence number": 10552341, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1190 + } + }, + { + "ph": "f", "id": 125, "pid": 2338710, "tid": 2379450, "ts": 6345937265176.658, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937265180.984, "dur": 7.968, + "args": { + "External id": 978600,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937265183.453, "dur": 4.662, + "args": { + "External id": 978601,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 1192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937265185.797, "dur": 2.006, + "args": { + "External id": 978602,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 1193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937265190.878, "dur": 93.885, + "args": { + "External id": 978603,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 1194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937265286.734, "dur": 16.775, + "args": { + "External id": 978604,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937265295.809, "dur": 6.660, + "args": { + "External id": 978605,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 1196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937265297.325, "dur": 4.934, + "args": { + "External id": 978606,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 1197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937265305.676, "dur": 6.473, + "args": { + "External id": 978607,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937265307.091, "dur": 4.523, + "args": { + "External id": 978608,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937265310.961, "dur": 0.566, + "args": { + "External id": 978609,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937265312.919, "dur": 73.184, + "args": { + "External id": 978610,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 1201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265402.924, "dur": 10.413, + "args": { + "External id": 978611,"Record function id": 0, "Sequence number": 10552340, "Fwd thread id": 1, "Ev Idx": 1202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265404.030, "dur": 7.179, + "args": { + "External id": 978612,"Sequence number": 10552340, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1203 + } + }, + { + "ph": "f", "id": 126, "pid": 2338710, "tid": 2379450, "ts": 6345937265404.030, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937265405.749, "dur": 5.299, + "args": { + "External id": 978613,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937265409.363, "dur": 1.577, + "args": { + "External id": 978614,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265417.343, "dur": 10.626, + "args": { + "External id": 978615,"Record function id": 0, "Sequence number": 10552339, "Fwd thread id": 1, "Ev Idx": 1206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265418.496, "dur": 7.202, + "args": { + "External id": 978616,"Sequence number": 10552339, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1207 + } + }, + { + "ph": "f", "id": 127, "pid": 2338710, "tid": 2379450, "ts": 6345937265418.496, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937265419.709, "dur": 5.727, + "args": { + "External id": 978617,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937265420.660, "dur": 4.235, + "args": { + "External id": 978618,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937265424.104, "dur": 0.613, + "args": { + "External id": 978619,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937265434.791, "dur": 13.256, + "args": { + "External id": 978620,"Record function id": 0, "Ev Idx": 1211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937265436.555, "dur": 10.508, + "args": { + "External id": 978621,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937265439.541, "dur": 7.049, + "args": { + "External id": 978622,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937265443.482, "dur": 2.980, + "args": { + "External id": 978623,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265451.730, "dur": 6.225, + "args": { + "External id": 978624,"Record function id": 0, "Sequence number": 10552338, "Fwd thread id": 1, "Ev Idx": 1215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265452.869, "dur": 3.151, + "args": { + "External id": 978625,"Sequence number": 10552338, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 1216 + } + }, + { + "ph": "f", "id": 128, "pid": 2338710, "tid": 2379450, "ts": 6345937265452.869, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937265454.108, "dur": 1.728, + "args": { + "External id": 978626,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937265454.700, "dur": 0.984, + "args": { + "External id": 978627,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265461.559, "dur": 109.755, + "args": { + "External id": 978628,"Record function id": 0, "Sequence number": 10552337, "Fwd thread id": 1, "Ev Idx": 1219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265462.380, "dur": 102.383, + "args": { + "External id": 978629,"Sequence number": 10552337, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1220 + } + }, + { + "ph": "f", "id": 129, "pid": 2338710, "tid": 2379450, "ts": 6345937265462.380, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937265464.458, "dur": 6.005, + "args": { + "External id": 978630,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937265465.274, "dur": 4.709, + "args": { + "External id": 978631,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 1222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937265469.242, "dur": 0.623, + "args": { + "External id": 978632,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 1223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937265471.180, "dur": 37.519, + "args": { + "External id": 978633,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 1224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937265510.221, "dur": 6.148, + "args": { + "External id": 978634,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937265510.901, "dur": 4.915, + "args": { + "External id": 978635,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 1226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937265514.665, "dur": 1.020, + "args": { + "External id": 978636,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 1227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937265517.624, "dur": 5.729, + "args": { + "External id": 978637,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937265521.734, "dur": 1.117, + "args": { + "External id": 978638,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937265522.329, "dur": 0.447, + "args": { + "External id": 978639,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937265523.866, "dur": 40.001, + "args": { + "External id": 978640,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 1231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265576.413, "dur": 38.515, + "args": { + "External id": 978641,"Record function id": 0, "Sequence number": 10552336, "Fwd thread id": 1, "Ev Idx": 1232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265577.503, "dur": 5.964, + "args": { + "External id": 978642,"Sequence number": 10552336, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1233 + } + }, + { + "ph": "f", "id": 130, "pid": 2338710, "tid": 2379450, "ts": 6345937265577.503, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937265578.737, "dur": 4.575, + "args": { + "External id": 978643,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937265581.903, "dur": 1.259, + "args": { + "External id": 978644,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2379450, + "ts": 6345937265587.074, "dur": 24.876, + "args": { + "External id": 978645,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265619.780, "dur": 10.349, + "args": { + "External id": 978646,"Record function id": 0, "Sequence number": 10552335, "Fwd thread id": 1, "Ev Idx": 1237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265623.317, "dur": 4.986, + "args": { + "External id": 978647,"Sequence number": 10552335, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1238 + } + }, + { + "ph": "f", "id": 131, "pid": 2338710, "tid": 2379450, "ts": 6345937265623.317, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937265624.294, "dur": 3.749, + "args": { + "External id": 978648,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937265625.430, "dur": 2.019, + "args": { + "External id": 978649,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937265626.807, "dur": 0.476, + "args": { + "External id": 978650,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937265634.774, "dur": 8.346, + "args": { + "External id": 978651,"Record function id": 0, "Ev Idx": 1242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937265636.191, "dur": 6.335, + "args": { + "External id": 978652,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937265637.574, "dur": 4.623, + "args": { + "External id": 978653,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937265638.357, "dur": 3.701, + "args": { + "External id": 978654,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265646.701, "dur": 10.564, + "args": { + "External id": 978655,"Record function id": 0, "Sequence number": 10552334, "Fwd thread id": 1, "Ev Idx": 1246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265647.913, "dur": 7.550, + "args": { + "External id": 978656,"Sequence number": 10552334, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1247 + } + }, + { + "ph": "f", "id": 132, "pid": 2338710, "tid": 2379450, "ts": 6345937265647.913, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937265651.250, "dur": 4.040, + "args": { + "External id": 978657,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937265654.058, "dur": 1.090, + "args": { + "External id": 978658,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265660.826, "dur": 106.651, + "args": { + "External id": 978659,"Record function id": 0, "Sequence number": 10552333, "Fwd thread id": 1, "Ev Idx": 1250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265661.683, "dur": 97.960, + "args": { + "External id": 978660,"Sequence number": 10552333, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1251 + } + }, + { + "ph": "f", "id": 133, "pid": 2338710, "tid": 2379450, "ts": 6345937265661.683, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937265664.094, "dur": 2.695, + "args": { + "External id": 978661,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937265664.897, "dur": 1.313, + "args": { + "External id": 978662,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 1253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937265665.677, "dur": 0.389, + "args": { + "External id": 978663,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 1254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937265669.837, "dur": 38.481, + "args": { + "External id": 978664,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 1255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937265709.690, "dur": 3.127, + "args": { + "External id": 978665,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937265710.177, "dur": 2.016, + "args": { + "External id": 978666,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937265711.375, "dur": 0.673, + "args": { + "External id": 978667,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937265713.930, "dur": 7.744, + "args": { + "External id": 978668,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937265715.149, "dur": 6.026, + "args": { + "External id": 978669,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937265720.552, "dur": 0.537, + "args": { + "External id": 978670,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937265722.263, "dur": 36.451, + "args": { + "External id": 978671,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 1262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265772.995, "dur": 27.628, + "args": { + "External id": 978672,"Record function id": 0, "Sequence number": 10552332, "Fwd thread id": 1, "Ev Idx": 1263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265773.990, "dur": 3.768, + "args": { + "External id": 978673,"Sequence number": 10552332, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1264 + } + }, + { + "ph": "f", "id": 134, "pid": 2338710, "tid": 2379450, "ts": 6345937265773.990, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937265775.535, "dur": 2.059, + "args": { + "External id": 978674,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937265776.484, "dur": 0.966, + "args": { + "External id": 978675,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345937265780.581, "dur": 17.207, + "args": { + "External id": 978676,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265804.981, "dur": 15.916, + "args": { + "External id": 978677,"Record function id": 0, "Sequence number": 10552331, "Fwd thread id": 1, "Ev Idx": 1268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937265806.214, "dur": 12.240, + "args": { + "External id": 978678,"Sequence number": 10552331, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1269 + } + }, + { + "ph": "f", "id": 135, "pid": 2338710, "tid": 2379450, "ts": 6345937265806.214, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937265807.460, "dur": 10.758, + "args": { + "External id": 978679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937265808.437, "dur": 9.130, + "args": { + "External id": 978680,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937265814.752, "dur": 2.665, + "args": { + "External id": 978681,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937265825.320, "dur": 5.522, + "args": { + "External id": 978682,"Record function id": 0, "Ev Idx": 1273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937265826.896, "dur": 3.307, + "args": { + "External id": 978683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937265828.052, "dur": 1.816, + "args": { + "External id": 978684,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937265828.716, "dur": 1.017, + "args": { + "External id": 978685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937265837.787, "dur": 486.173, + "args": { + "External id": 978686,"Record function id": 0, "Sequence number": 10552330, "Fwd thread id": 1, "Ev Idx": 1277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937265838.918, "dur": 449.777, + "args": { + "External id": 978687,"Sequence number": 10552330, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1278 + } + }, + { + "ph": "f", "id": 136, "pid": 2338710, "tid": 2379450, "ts": 6345937265838.918, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937265876.588, "dur": 2.701, + "args": { + "External id": 978688,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937265877.825, "dur": 1.307, + "args": { + "External id": 978689,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937265898.041, "dur": 4.500, + "args": { + "External id": 978690,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937265913.411, "dur": 2.633, + "args": { + "External id": 978691,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937266161.720, "dur": 3.637, + "args": { + "External id": 978692,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 1283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937266170.637, "dur": 42.373, + "args": { + "External id": 978693,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 1284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937266184.644, "dur": 1.036, + "args": { + "External id": 978694,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 1285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345937266219.870, "dur": 36.608, + "args": { + "External id": 978695,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 1286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937266221.939, "dur": 34.290, + "args": { + "External id": 978696,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 1287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937266229.729, "dur": 5.989, + "args": { + "External id": 978697,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937266237.772, "dur": 17.879, + "args": { + "External id": 978698,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 1289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2379450, + "ts": 6345937266265.015, "dur": 5.317, + "args": { + "External id": 978699,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 1290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937266266.368, "dur": 3.844, + "args": { + "External id": 978700,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 1291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937266277.919, "dur": 1.905, + "args": { + "External id": 978701,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937266278.779, "dur": 0.916, + "args": { + "External id": 978702,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345937266301.099, "dur": 18.269, + "args": { + "External id": 978703,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937266339.999, "dur": 15.059, + "args": { + "External id": 978704,"Record function id": 0, "Ev Idx": 1295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937266344.645, "dur": 9.197, + "args": { + "External id": 978705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937266347.300, "dur": 5.406, + "args": { + "External id": 978706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937266350.552, "dur": 2.048, + "args": { + "External id": 978707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937266359.887, "dur": 6.306, + "args": { + "External id": 978708,"Record function id": 0, "Sequence number": 10552329, "Fwd thread id": 1, "Ev Idx": 1299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937266361.432, "dur": 1.587, + "args": { + "External id": 978709,"Sequence number": 10552329, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1300 + } + }, + { + "ph": "f", "id": 137, "pid": 2338710, "tid": 2379450, "ts": 6345937266361.432, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937266370.531, "dur": 474.836, + "args": { + "External id": 978710,"Record function id": 0, "Sequence number": 10552328, "Fwd thread id": 1, "Ev Idx": 1301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937266371.796, "dur": 463.053, + "args": { + "External id": 978711,"Sequence number": 10552328, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1302 + } + }, + { + "ph": "f", "id": 138, "pid": 2338710, "tid": 2379450, "ts": 6345937266371.796, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937266404.832, "dur": 8.668, + "args": { + "External id": 978712,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338710, "tid": 2379450, + "ts": 6345937266409.654, "dur": 3.514, + "args": { + "External id": 978713,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 1304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937266416.980, "dur": 9.413, + "args": { + "External id": 978714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937266420.977, "dur": 4.714, + "args": { + "External id": 978715,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937266424.938, "dur": 0.573, + "args": { + "External id": 978716,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2379450, + "ts": 6345937266430.453, "dur": 105.723, + "args": { + "External id": 978717,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 1308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937266431.782, "dur": 3.345, + "args": { + "External id": 978718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 1309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937266432.742, "dur": 1.837, + "args": { + "External id": 978719,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 1310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937266433.919, "dur": 0.562, + "args": { + "External id": 978720,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 1311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2379450, + "ts": 6345937266438.919, "dur": 96.650, + "args": { + "External id": 978721,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 1312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937266440.694, "dur": 93.789, + "args": { + "External id": 978722,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 1313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2379450, + "ts": 6345937266540.422, "dur": 2.845, + "args": { + "External id": 978723,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 1314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937266541.594, "dur": 1.532, + "args": { + "External id": 978724,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 1315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937266583.486, "dur": 4.568, + "args": { + "External id": 978725,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937266591.274, "dur": 7.419, + "args": { + "External id": 978726,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937266599.770, "dur": 2.341, + "args": { + "External id": 978727,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937266643.693, "dur": 2.543, + "args": { + "External id": 978728,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937266644.711, "dur": 1.342, + "args": { + "External id": 978729,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338710, "tid": 2379450, + "ts": 6345937266669.873, "dur": 145.271, + "args": { + "External id": 978730,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 1321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2379450, + "ts": 6345937266675.824, "dur": 8.144, + "args": { + "External id": 978731,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937266682.200, "dur": 0.987, + "args": { + "External id": 978732,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 1323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345937266685.649, "dur": 9.098, + "args": { + "External id": 978733,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 1324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937266692.933, "dur": 0.861, + "args": { + "External id": 978734,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 1325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2379450, + "ts": 6345937266696.639, "dur": 1.782, + "args": { + "External id": 978735,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 1326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937266697.615, "dur": 0.414, + "args": { + "External id": 978736,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 1327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345937266701.649, "dur": 2.623, + "args": { + "External id": 978737,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 1328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937266702.949, "dur": 0.772, + "args": { + "External id": 978738,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 1329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345937266713.401, "dur": 2.478, + "args": { + "External id": 978739,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 1330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937266714.922, "dur": 0.579, + "args": { + "External id": 978740,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 1331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937266716.764, "dur": 9.778, + "args": { + "External id": 978741,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 1332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338710, "tid": 2379450, + "ts": 6345937266721.925, "dur": 4.411, + "args": { + "External id": 978742,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 1333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345937266727.527, "dur": 4.102, + "args": { + "External id": 978743,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 1334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937266730.643, "dur": 0.626, + "args": { + "External id": 978744,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 1335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937266732.087, "dur": 4.018, + "args": { + "External id": 978745,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 1336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937266732.721, "dur": 3.281, + "args": { + "External id": 978746,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 1337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345937266737.484, "dur": 57.604, + "args": { + "External id": 978747,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 1338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937266799.645, "dur": 1.414, + "args": { + "External id": 978748,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 1339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345937266804.802, "dur": 4.980, + "args": { + "External id": 978749,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 1340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937266808.613, "dur": 0.494, + "args": { + "External id": 978750,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 1341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937266812.749, "dur": 1.121, + "args": { + "External id": 978751,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 1342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937266855.432, "dur": 13.147, + "args": { + "External id": 978752,"Record function id": 0, "Ev Idx": 1343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937266857.584, "dur": 10.015, + "args": { + "External id": 978753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937266859.140, "dur": 7.571, + "args": { + "External id": 978754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937266864.372, "dur": 2.190, + "args": { + "External id": 978755,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937266872.935, "dur": 9.410, + "args": { + "External id": 978756,"Record function id": 0, "Sequence number": 10552327, "Fwd thread id": 1, "Ev Idx": 1347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937266873.830, "dur": 6.411, + "args": { + "External id": 978757,"Sequence number": 10552327, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 1348 + } + }, + { + "ph": "f", "id": 139, "pid": 2338710, "tid": 2379450, "ts": 6345937266873.830, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937266878.198, "dur": 1.839, + "args": { + "External id": 978758,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937266878.882, "dur": 0.981, + "args": { + "External id": 978759,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937266886.370, "dur": 149.779, + "args": { + "External id": 978760,"Record function id": 0, "Sequence number": 10552326, "Fwd thread id": 1, "Ev Idx": 1351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937266887.158, "dur": 114.968, + "args": { + "External id": 978761,"Sequence number": 10552326, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1352 + } + }, + { + "ph": "f", "id": 140, "pid": 2338710, "tid": 2379450, "ts": 6345937266887.158, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937266889.588, "dur": 9.234, + "args": { + "External id": 978762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937266891.032, "dur": 7.165, + "args": { + "External id": 978763,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 1354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937266895.047, "dur": 2.960, + "args": { + "External id": 978764,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 1355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937266899.929, "dur": 46.887, + "args": { + "External id": 978765,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 1356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937266948.412, "dur": 6.868, + "args": { + "External id": 978766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937266949.496, "dur": 5.041, + "args": { + "External id": 978767,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 1358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937266953.650, "dur": 0.746, + "args": { + "External id": 978768,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 1359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937266957.073, "dur": 5.062, + "args": { + "External id": 978769,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937266958.075, "dur": 3.560, + "args": { + "External id": 978770,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937266961.041, "dur": 0.509, + "args": { + "External id": 978771,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937266963.018, "dur": 38.043, + "args": { + "External id": 978772,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 1363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937267045.346, "dur": 49.585, + "args": { + "External id": 978773,"Record function id": 0, "Sequence number": 10552325, "Fwd thread id": 1, "Ev Idx": 1364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937267046.726, "dur": 45.084, + "args": { + "External id": 978774,"Sequence number": 10552325, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1365 + } + }, + { + "ph": "f", "id": 141, "pid": 2338710, "tid": 2379450, "ts": 6345937267046.726, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937267048.727, "dur": 42.824, + "args": { + "External id": 978775,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937267052.059, "dur": 38.917, + "args": { + "External id": 978776,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937267101.507, "dur": 11.319, + "args": { + "External id": 978777,"Record function id": 0, "Sequence number": 10552324, "Fwd thread id": 1, "Ev Idx": 1368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937267102.531, "dur": 8.079, + "args": { + "External id": 978778,"Sequence number": 10552324, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1369 + } + }, + { + "ph": "f", "id": 142, "pid": 2338710, "tid": 2379450, "ts": 6345937267102.531, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937267103.451, "dur": 6.878, + "args": { + "External id": 978779,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937267104.624, "dur": 5.080, + "args": { + "External id": 978780,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937267108.764, "dur": 0.760, + "args": { + "External id": 978781,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937267117.826, "dur": 9.366, + "args": { + "External id": 978782,"Record function id": 0, "Ev Idx": 1373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937267119.399, "dur": 7.136, + "args": { + "External id": 978783,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937267121.260, "dur": 4.915, + "args": { + "External id": 978784,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937267122.154, "dur": 3.909, + "args": { + "External id": 978785,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937267130.796, "dur": 7.776, + "args": { + "External id": 978786,"Record function id": 0, "Sequence number": 10552323, "Fwd thread id": 1, "Ev Idx": 1377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937267131.714, "dur": 5.015, + "args": { + "External id": 978787,"Sequence number": 10552323, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 1378 + } + }, + { + "ph": "f", "id": 143, "pid": 2338710, "tid": 2379450, "ts": 6345937267131.714, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937267135.042, "dur": 1.485, + "args": { + "External id": 978788,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937267135.570, "dur": 0.809, + "args": { + "External id": 978789,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937267142.277, "dur": 120.782, + "args": { + "External id": 978790,"Record function id": 0, "Sequence number": 10552322, "Fwd thread id": 1, "Ev Idx": 1381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937267146.182, "dur": 107.190, + "args": { + "External id": 978791,"Sequence number": 10552322, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1382 + } + }, + { + "ph": "f", "id": 144, "pid": 2338710, "tid": 2379450, "ts": 6345937267146.182, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937267148.523, "dur": 3.170, + "args": { + "External id": 978792,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937267149.481, "dur": 1.660, + "args": { + "External id": 978793,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 1384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937267150.241, "dur": 0.738, + "args": { + "External id": 978794,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 1385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937267152.774, "dur": 46.942, + "args": { + "External id": 978795,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 1386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937267201.192, "dur": 6.756, + "args": { + "External id": 978796,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937267202.216, "dur": 5.058, + "args": { + "External id": 978797,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 1388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937267206.122, "dur": 1.006, + "args": { + "External id": 978798,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 1389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937267209.221, "dur": 5.498, + "args": { + "External id": 978799,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937267209.995, "dur": 4.189, + "args": { + "External id": 978800,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937267213.665, "dur": 0.442, + "args": { + "External id": 978801,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937267215.282, "dur": 36.972, + "args": { + "External id": 978802,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 1393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937267270.815, "dur": 39.683, + "args": { + "External id": 978803,"Record function id": 0, "Sequence number": 10552321, "Fwd thread id": 1, "Ev Idx": 1394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937267271.904, "dur": 6.031, + "args": { + "External id": 978804,"Sequence number": 10552321, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1395 + } + }, + { + "ph": "f", "id": 145, "pid": 2338710, "tid": 2379450, "ts": 6345937267271.904, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937267275.589, "dur": 2.153, + "args": { + "External id": 978805,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937267276.369, "dur": 1.251, + "args": { + "External id": 978806,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2379450, + "ts": 6345937267281.346, "dur": 25.649, + "args": { + "External id": 978807,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937267314.988, "dur": 10.361, + "args": { + "External id": 978808,"Record function id": 0, "Sequence number": 10552320, "Fwd thread id": 1, "Ev Idx": 1399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937267316.224, "dur": 7.598, + "args": { + "External id": 978809,"Sequence number": 10552320, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1400 + } + }, + { + "ph": "f", "id": 146, "pid": 2338710, "tid": 2379450, "ts": 6345937267316.224, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937267317.221, "dur": 6.332, + "args": { + "External id": 978810,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937267318.498, "dur": 4.457, + "args": { + "External id": 978811,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937267322.117, "dur": 0.737, + "args": { + "External id": 978812,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937267329.685, "dur": 7.799, + "args": { + "External id": 978813,"Record function id": 0, "Ev Idx": 1404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937267330.900, "dur": 6.005, + "args": { + "External id": 978814,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937267331.983, "dur": 4.626, + "args": { + "External id": 978815,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937267335.057, "dur": 1.442, + "args": { + "External id": 978816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937267341.861, "dur": 469.058, + "args": { + "External id": 978817,"Record function id": 0, "Sequence number": 10552319, "Fwd thread id": 1, "Ev Idx": 1408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937267343.093, "dur": 430.824, + "args": { + "External id": 978818,"Sequence number": 10552319, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 1409 + } + }, + { + "ph": "f", "id": 147, "pid": 2338710, "tid": 2379450, "ts": 6345937267343.093, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338710, "tid": 2379450, + "ts": 6345937267370.079, "dur": 36.785, + "args": { + "External id": 978819,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2379450, + "ts": 6345937267371.775, "dur": 34.800, + "args": { + "External id": 978820,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345937267374.947, "dur": 6.890, + "args": { + "External id": 978821,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 1412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937267377.864, "dur": 3.318, + "args": { + "External id": 978822,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937267383.340, "dur": 22.676, + "args": { + "External id": 978823,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937267421.803, "dur": 2.528, + "args": { + "External id": 978824,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937267422.639, "dur": 1.542, + "args": { + "External id": 978825,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937267431.304, "dur": 1.936, + "args": { + "External id": 978826,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937267432.283, "dur": 0.849, + "args": { + "External id": 978827,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937267447.390, "dur": 2.535, + "args": { + "External id": 978828,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937267462.453, "dur": 2.202, + "args": { + "External id": 978829,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937267650.639, "dur": 4.717, + "args": { + "External id": 978830,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 1421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937267662.394, "dur": 38.735, + "args": { + "External id": 978831,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 1422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937267675.324, "dur": 1.051, + "args": { + "External id": 978832,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 1423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345937267707.923, "dur": 31.562, + "args": { + "External id": 978833,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 1424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937267709.850, "dur": 29.381, + "args": { + "External id": 978834,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 1425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937267716.925, "dur": 4.535, + "args": { + "External id": 978835,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937267723.032, "dur": 15.650, + "args": { + "External id": 978836,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 1427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2379450, + "ts": 6345937267743.979, "dur": 2.671, + "args": { + "External id": 978837,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 1428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937267745.326, "dur": 1.193, + "args": { + "External id": 978838,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 1429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937267755.850, "dur": 2.082, + "args": { + "External id": 978839,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937267756.662, "dur": 1.155, + "args": { + "External id": 978840,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937267760.146, "dur": 3.911, + "args": { + "External id": 978841,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937267762.679, "dur": 1.271, + "args": { + "External id": 978842,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345937267791.035, "dur": 18.189, + "args": { + "External id": 978843,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937267823.184, "dur": 8.877, + "args": { + "External id": 978844,"Record function id": 0, "Ev Idx": 1435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937267825.139, "dur": 5.978, + "args": { + "External id": 978845,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937267827.289, "dur": 2.723, + "args": { + "External id": 978846,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937267828.182, "dur": 1.697, + "args": { + "External id": 978847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937267836.083, "dur": 11.310, + "args": { + "External id": 978848,"Record function id": 0, "Sequence number": 10552318, "Fwd thread id": 1, "Ev Idx": 1439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937267837.019, "dur": 7.138, + "args": { + "External id": 978849,"Sequence number": 10552318, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1440 + } + }, + { + "ph": "f", "id": 148, "pid": 2338710, "tid": 2379450, "ts": 6345937267837.019, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937267841.270, "dur": 2.705, + "args": { + "External id": 978850,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937267842.084, "dur": 1.699, + "args": { + "External id": 978851,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937267851.381, "dur": 180.096, + "args": { + "External id": 978852,"Record function id": 0, "Sequence number": 10552317, "Fwd thread id": 1, "Ev Idx": 1443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937267852.319, "dur": 149.006, + "args": { + "External id": 978853,"Sequence number": 10552317, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1444 + } + }, + { + "ph": "f", "id": 149, "pid": 2338710, "tid": 2379450, "ts": 6345937267852.319, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937267855.487, "dur": 7.478, + "args": { + "External id": 978854,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937267856.882, "dur": 5.406, + "args": { + "External id": 978855,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 1446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937267860.964, "dur": 1.127, + "args": { + "External id": 978856,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 1447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937267864.039, "dur": 74.848, + "args": { + "External id": 978857,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 1448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937267940.683, "dur": 6.295, + "args": { + "External id": 978858,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937267941.579, "dur": 4.661, + "args": { + "External id": 978859,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937267945.171, "dur": 0.866, + "args": { + "External id": 978860,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937267948.824, "dur": 8.379, + "args": { + "External id": 978861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937267950.104, "dur": 6.541, + "args": { + "External id": 978862,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937267953.350, "dur": 3.154, + "args": { + "External id": 978863,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937267957.912, "dur": 42.265, + "args": { + "External id": 978864,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 1455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937268040.438, "dur": 11.573, + "args": { + "External id": 978865,"Record function id": 0, "Sequence number": 10552316, "Fwd thread id": 1, "Ev Idx": 1456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937268041.872, "dur": 8.329, + "args": { + "External id": 978866,"Sequence number": 10552316, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1457 + } + }, + { + "ph": "f", "id": 150, "pid": 2338710, "tid": 2379450, "ts": 6345937268041.872, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937268044.160, "dur": 5.768, + "args": { + "External id": 978867,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937268047.478, "dur": 2.290, + "args": { + "External id": 978868,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937268102.680, "dur": 13.697, + "args": { + "External id": 978869,"Record function id": 0, "Sequence number": 10552315, "Fwd thread id": 1, "Ev Idx": 1460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937268104.298, "dur": 9.577, + "args": { + "External id": 978870,"Sequence number": 10552315, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1461 + } + }, + { + "ph": "f", "id": 151, "pid": 2338710, "tid": 2379450, "ts": 6345937268104.298, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937268105.427, "dur": 8.132, + "args": { + "External id": 978871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937268109.624, "dur": 3.163, + "args": { + "External id": 978872,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937268111.592, "dur": 0.955, + "args": { + "External id": 978873,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937268121.728, "dur": 7.561, + "args": { + "External id": 978874,"Record function id": 0, "Ev Idx": 1465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937268123.193, "dur": 5.460, + "args": { + "External id": 978875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937268125.319, "dur": 2.988, + "args": { + "External id": 978876,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937268126.162, "dur": 2.033, + "args": { + "External id": 978877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937268132.719, "dur": 6.257, + "args": { + "External id": 978878,"Record function id": 0, "Sequence number": 10552314, "Fwd thread id": 1, "Ev Idx": 1469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937268133.489, "dur": 3.335, + "args": { + "External id": 978879,"Sequence number": 10552314, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1470 + } + }, + { + "ph": "f", "id": 152, "pid": 2338710, "tid": 2379450, "ts": 6345937268133.489, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937268134.582, "dur": 2.067, + "args": { + "External id": 978880,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937268135.005, "dur": 1.524, + "args": { + "External id": 978881,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937268146.033, "dur": 401.517, + "args": { + "External id": 978882,"Record function id": 0, "Sequence number": 10552313, "Fwd thread id": 1, "Ev Idx": 1473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937268147.467, "dur": 380.172, + "args": { + "External id": 978883,"Sequence number": 10552313, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1474 + } + }, + { + "ph": "f", "id": 153, "pid": 2338710, "tid": 2379450, "ts": 6345937268147.467, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345937268164.655, "dur": 8.591, + "args": { + "External id": 978884,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 1475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937268167.578, "dur": 5.212, + "args": { + "External id": 978885,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345937268175.483, "dur": 6.107, + "args": { + "External id": 978886,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937268178.962, "dur": 2.386, + "args": { + "External id": 978887,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345937268183.020, "dur": 5.884, + "args": { + "External id": 978888,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937268184.297, "dur": 4.241, + "args": { + "External id": 978889,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937268224.457, "dur": 275.382, + "args": { + "External id": 978890,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937268317.254, "dur": 7.782, + "args": { + "External id": 978891,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937268326.931, "dur": 4.587, + "args": { + "External id": 978892,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937268332.584, "dur": 1.648, + "args": { + "External id": 978893,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937268335.076, "dur": 1.840, + "args": { + "External id": 978894,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937268388.299, "dur": 5.369, + "args": { + "External id": 978895,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937268392.015, "dur": 1.497, + "args": { + "External id": 978896,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937268398.052, "dur": 33.104, + "args": { + "External id": 978897,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937268405.853, "dur": 0.981, + "args": { + "External id": 978898,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937268432.384, "dur": 1.297, + "args": { + "External id": 978899,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937268432.958, "dur": 0.614, + "args": { + "External id": 978900,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937268434.515, "dur": 17.239, + "args": { + "External id": 978901,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937268436.132, "dur": 2.908, + "args": { + "External id": 978902,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2379450, + "ts": 6345937268513.808, "dur": 3.792, + "args": { + "External id": 978903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2379450, + "ts": 6345937268520.845, "dur": 0.698, + "args": { + "External id": 978904,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2379450, + "ts": 6345937268523.650, "dur": 0.827, + "args": { + "External id": 978905,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937268558.181, "dur": 267.715, + "args": { + "External id": 978906,"Record function id": 0, "Sequence number": 10552312, "Fwd thread id": 1, "Ev Idx": 1497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937268559.836, "dur": 258.624, + "args": { + "External id": 978907,"Sequence number": 10552312, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1498 + } + }, + { + "ph": "f", "id": 154, "pid": 2338710, "tid": 2379450, "ts": 6345937268559.836, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2379450, + "ts": 6345937268582.923, "dur": 52.568, + "args": { + "External id": 978908,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937268588.312, "dur": 5.735, + "args": { + "External id": 978909,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937268595.659, "dur": 39.043, + "args": { + "External id": 978910,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 1501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345937268646.828, "dur": 4.079, + "args": { + "External id": 978911,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937268648.211, "dur": 2.400, + "args": { + "External id": 978912,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937268833.801, "dur": 216.967, + "args": { + "External id": 978913,"Record function id": 0, "Sequence number": 10552311, "Fwd thread id": 1, "Ev Idx": 1504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937268835.450, "dur": 206.544, + "args": { + "External id": 978914,"Sequence number": 10552311, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1505 + } + }, + { + "ph": "f", "id": 155, "pid": 2338710, "tid": 2379450, "ts": 6345937268835.450, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2379450, + "ts": 6345937268854.342, "dur": 47.125, + "args": { + "External id": 978915,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937268859.215, "dur": 2.955, + "args": { + "External id": 978916,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937268863.425, "dur": 37.417, + "args": { + "External id": 978917,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 1508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345937268909.477, "dur": 6.756, + "args": { + "External id": 978918,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 1509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937268911.010, "dur": 4.939, + "args": { + "External id": 978919,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269103.007, "dur": 22.811, + "args": { + "External id": 978920,"Record function id": 0, "Sequence number": 10552310, "Fwd thread id": 1, "Ev Idx": 1511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269105.313, "dur": 16.246, + "args": { + "External id": 978921,"Sequence number": 10552310, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1512 + } + }, + { + "ph": "f", "id": 156, "pid": 2338710, "tid": 2379450, "ts": 6345937269105.313, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937269108.564, "dur": 12.641, + "args": { + "External id": 978922,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937269114.327, "dur": 6.571, + "args": { + "External id": 978923,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269132.349, "dur": 6.318, + "args": { + "External id": 978924,"Record function id": 0, "Sequence number": 10552309, "Fwd thread id": 1, "Ev Idx": 1515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269133.273, "dur": 3.420, + "args": { + "External id": 978925,"Sequence number": 10552309, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1516 + } + }, + { + "ph": "f", "id": 157, "pid": 2338710, "tid": 2379450, "ts": 6345937269133.273, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937269134.335, "dur": 2.193, + "args": { + "External id": 978926,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937269135.103, "dur": 1.299, + "args": { + "External id": 978927,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269142.422, "dur": 8.762, + "args": { + "External id": 978928,"Record function id": 0, "Sequence number": 10552308, "Fwd thread id": 1, "Ev Idx": 1519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269143.181, "dur": 6.173, + "args": { + "External id": 978929,"Sequence number": 10552308, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1520 + } + }, + { + "ph": "f", "id": 158, "pid": 2338710, "tid": 2379450, "ts": 6345937269143.181, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937269144.143, "dur": 5.022, + "args": { + "External id": 978930,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937269148.125, "dur": 0.895, + "args": { + "External id": 978931,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269155.078, "dur": 7.778, + "args": { + "External id": 978932,"Record function id": 0, "Sequence number": 10552307, "Fwd thread id": 1, "Ev Idx": 1523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269155.891, "dur": 4.754, + "args": { + "External id": 978933,"Sequence number": 10552307, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 1524 + } + }, + { + "ph": "f", "id": 159, "pid": 2338710, "tid": 2379450, "ts": 6345937269155.891, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937269156.787, "dur": 3.673, + "args": { + "External id": 978934,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937269159.297, "dur": 1.050, + "args": { + "External id": 978935,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269166.807, "dur": 219.521, + "args": { + "External id": 978936,"Record function id": 0, "Sequence number": 10552306, "Fwd thread id": 1, "Ev Idx": 1527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269167.940, "dur": 209.278, + "args": { + "External id": 978937,"Sequence number": 10552306, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1528 + } + }, + { + "ph": "f", "id": 160, "pid": 2338710, "tid": 2379450, "ts": 6345937269167.940, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937269171.583, "dur": 9.069, + "args": { + "External id": 978938,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937269173.581, "dur": 6.360, + "args": { + "External id": 978939,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 1530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937269175.555, "dur": 4.071, + "args": { + "External id": 978940,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 1531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937269184.704, "dur": 90.478, + "args": { + "External id": 978941,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 1532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937269276.841, "dur": 7.607, + "args": { + "External id": 978942,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937269277.841, "dur": 5.716, + "args": { + "External id": 978943,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 1534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937269282.055, "dur": 1.345, + "args": { + "External id": 978944,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 1535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937269313.132, "dur": 5.448, + "args": { + "External id": 978945,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937269314.458, "dur": 3.643, + "args": { + "External id": 978946,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937269317.616, "dur": 0.369, + "args": { + "External id": 978947,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937269319.565, "dur": 56.590, + "args": { + "External id": 978948,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 1539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269392.575, "dur": 9.693, + "args": { + "External id": 978949,"Record function id": 0, "Sequence number": 10552305, "Fwd thread id": 1, "Ev Idx": 1540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269393.583, "dur": 6.831, + "args": { + "External id": 978950,"Sequence number": 10552305, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1541 + } + }, + { + "ph": "f", "id": 161, "pid": 2338710, "tid": 2379450, "ts": 6345937269393.583, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937269395.284, "dur": 4.954, + "args": { + "External id": 978951,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937269398.651, "dur": 1.442, + "args": { + "External id": 978952,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269406.441, "dur": 9.694, + "args": { + "External id": 978953,"Record function id": 0, "Sequence number": 10552304, "Fwd thread id": 1, "Ev Idx": 1544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269407.211, "dur": 6.513, + "args": { + "External id": 978954,"Sequence number": 10552304, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1545 + } + }, + { + "ph": "f", "id": 162, "pid": 2338710, "tid": 2379450, "ts": 6345937269407.211, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937269408.063, "dur": 5.382, + "args": { + "External id": 978955,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937269411.061, "dur": 1.856, + "args": { + "External id": 978956,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937269412.047, "dur": 0.703, + "args": { + "External id": 978957,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937269422.787, "dur": 12.641, + "args": { + "External id": 978958,"Record function id": 0, "Ev Idx": 1549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937269424.490, "dur": 9.948, + "args": { + "External id": 978959,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937269427.382, "dur": 6.619, + "args": { + "External id": 978960,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937269428.715, "dur": 5.173, + "args": { + "External id": 978961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269439.280, "dur": 8.373, + "args": { + "External id": 978962,"Record function id": 0, "Sequence number": 10552303, "Fwd thread id": 1, "Ev Idx": 1553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269440.198, "dur": 5.674, + "args": { + "External id": 978963,"Sequence number": 10552303, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 1554 + } + }, + { + "ph": "f", "id": 163, "pid": 2338710, "tid": 2379450, "ts": 6345937269440.198, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937269444.108, "dur": 1.607, + "args": { + "External id": 978964,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937269444.611, "dur": 0.966, + "args": { + "External id": 978965,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269453.529, "dur": 107.816, + "args": { + "External id": 978966,"Record function id": 0, "Sequence number": 10552302, "Fwd thread id": 1, "Ev Idx": 1557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269454.371, "dur": 99.147, + "args": { + "External id": 978967,"Sequence number": 10552302, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1558 + } + }, + { + "ph": "f", "id": 164, "pid": 2338710, "tid": 2379450, "ts": 6345937269454.371, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937269456.748, "dur": 5.980, + "args": { + "External id": 978968,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937269457.265, "dur": 4.952, + "args": { + "External id": 978969,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 1560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937269461.434, "dur": 0.659, + "args": { + "External id": 978970,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 1561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937269463.570, "dur": 31.579, + "args": { + "External id": 978971,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 1562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937269499.140, "dur": 5.238, + "args": { + "External id": 978972,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937269499.825, "dur": 3.862, + "args": { + "External id": 978973,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 1564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937269500.690, "dur": 2.785, + "args": { + "External id": 978974,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 1565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937269505.788, "dur": 5.403, + "args": { + "External id": 978975,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937269506.547, "dur": 4.127, + "args": { + "External id": 978976,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937269510.127, "dur": 0.435, + "args": { + "External id": 978977,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937269513.835, "dur": 38.900, + "args": { + "External id": 978978,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 1569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269569.482, "dur": 38.988, + "args": { + "External id": 978979,"Record function id": 0, "Sequence number": 10552301, "Fwd thread id": 1, "Ev Idx": 1570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269570.510, "dur": 3.774, + "args": { + "External id": 978980,"Sequence number": 10552301, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1571 + } + }, + { + "ph": "f", "id": 165, "pid": 2338710, "tid": 2379450, "ts": 6345937269570.510, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937269572.089, "dur": 2.032, + "args": { + "External id": 978981,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937269572.691, "dur": 1.319, + "args": { + "External id": 978982,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2379450, + "ts": 6345937269577.896, "dur": 27.462, + "args": { + "External id": 978983,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269615.038, "dur": 9.990, + "args": { + "External id": 978984,"Record function id": 0, "Sequence number": 10552300, "Fwd thread id": 1, "Ev Idx": 1575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269615.922, "dur": 6.847, + "args": { + "External id": 978985,"Sequence number": 10552300, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1576 + } + }, + { + "ph": "f", "id": 166, "pid": 2338710, "tid": 2379450, "ts": 6345937269615.922, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937269617.005, "dur": 5.524, + "args": { + "External id": 978986,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937269617.942, "dur": 3.969, + "args": { + "External id": 978987,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937269621.254, "dur": 0.481, + "args": { + "External id": 978988,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937269629.795, "dur": 8.320, + "args": { + "External id": 978989,"Record function id": 0, "Ev Idx": 1580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937269631.155, "dur": 6.360, + "args": { + "External id": 978990,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937269632.393, "dur": 4.756, + "args": { + "External id": 978991,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937269635.750, "dur": 1.279, + "args": { + "External id": 978992,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269641.683, "dur": 5.956, + "args": { + "External id": 978993,"Record function id": 0, "Sequence number": 10552299, "Fwd thread id": 1, "Ev Idx": 1584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269642.619, "dur": 2.736, + "args": { + "External id": 978994,"Sequence number": 10552299, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1585 + } + }, + { + "ph": "f", "id": 167, "pid": 2338710, "tid": 2379450, "ts": 6345937269642.619, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937269643.655, "dur": 1.527, + "args": { + "External id": 978995,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937269644.185, "dur": 0.844, + "args": { + "External id": 978996,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269651.419, "dur": 108.290, + "args": { + "External id": 978997,"Record function id": 0, "Sequence number": 10552298, "Fwd thread id": 1, "Ev Idx": 1588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269652.120, "dur": 99.874, + "args": { + "External id": 978998,"Sequence number": 10552298, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1589 + } + }, + { + "ph": "f", "id": 168, "pid": 2338710, "tid": 2379450, "ts": 6345937269652.120, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937269654.370, "dur": 4.992, + "args": { + "External id": 978999,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937269657.295, "dur": 1.514, + "args": { + "External id": 979000,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 1591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937269658.037, "dur": 0.581, + "args": { + "External id": 979001,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 1592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937269659.891, "dur": 38.785, + "args": { + "External id": 979002,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 1593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937269700.069, "dur": 5.847, + "args": { + "External id": 979003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937269700.630, "dur": 4.627, + "args": { + "External id": 979004,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937269704.066, "dur": 1.035, + "args": { + "External id": 979005,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937269709.376, "dur": 2.522, + "args": { + "External id": 979006,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937269710.329, "dur": 1.031, + "args": { + "External id": 979007,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937269710.982, "dur": 0.299, + "args": { + "External id": 979008,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937269712.487, "dur": 38.745, + "args": { + "External id": 979009,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 1600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269764.762, "dur": 32.458, + "args": { + "External id": 979010,"Record function id": 0, "Sequence number": 10552297, "Fwd thread id": 1, "Ev Idx": 1601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269765.694, "dur": 8.522, + "args": { + "External id": 979011,"Sequence number": 10552297, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1602 + } + }, + { + "ph": "f", "id": 169, "pid": 2338710, "tid": 2379450, "ts": 6345937269765.694, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937269769.213, "dur": 4.831, + "args": { + "External id": 979012,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937269772.724, "dur": 1.209, + "args": { + "External id": 979013,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345937269776.698, "dur": 17.269, + "args": { + "External id": 979014,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269801.454, "dur": 6.709, + "args": { + "External id": 979015,"Record function id": 0, "Sequence number": 10552296, "Fwd thread id": 1, "Ev Idx": 1606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345937269802.503, "dur": 4.148, + "args": { + "External id": 979016,"Sequence number": 10552296, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1607 + } + }, + { + "ph": "f", "id": 170, "pid": 2338710, "tid": 2379450, "ts": 6345937269802.503, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345937269803.224, "dur": 3.212, + "args": { + "External id": 979017,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345937269803.849, "dur": 1.893, + "args": { + "External id": 979018,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937269804.889, "dur": 0.700, + "args": { + "External id": 979019,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937269812.574, "dur": 5.149, + "args": { + "External id": 979020,"Record function id": 0, "Ev Idx": 1611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937269813.796, "dur": 3.360, + "args": { + "External id": 979021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937269814.762, "dur": 2.021, + "args": { + "External id": 979022,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937269815.343, "dur": 1.349, + "args": { + "External id": 979023,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937269822.226, "dur": 499.436, + "args": { + "External id": 979024,"Record function id": 0, "Sequence number": 10552295, "Fwd thread id": 1, "Ev Idx": 1615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937269823.489, "dur": 458.514, + "args": { + "External id": 979025,"Sequence number": 10552295, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1616 + } + }, + { + "ph": "f", "id": 171, "pid": 2338710, "tid": 2379450, "ts": 6345937269823.489, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937269865.339, "dur": 1.978, + "args": { + "External id": 979026,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937269865.931, "dur": 1.219, + "args": { + "External id": 979027,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937269883.975, "dur": 4.549, + "args": { + "External id": 979028,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937269899.674, "dur": 2.506, + "args": { + "External id": 979029,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937270144.575, "dur": 4.238, + "args": { + "External id": 979030,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 1621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937270153.948, "dur": 50.691, + "args": { + "External id": 979031,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 1622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937270174.623, "dur": 1.347, + "args": { + "External id": 979032,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 1623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345937270211.578, "dur": 37.705, + "args": { + "External id": 979033,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 1624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937270216.273, "dur": 32.737, + "args": { + "External id": 979034,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 1625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937270221.209, "dur": 4.713, + "args": { + "External id": 979035,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937270227.808, "dur": 20.587, + "args": { + "External id": 979036,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 1627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2379450, + "ts": 6345937270255.068, "dur": 2.983, + "args": { + "External id": 979037,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 1628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937270256.416, "dur": 1.504, + "args": { + "External id": 979038,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 1629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937270268.237, "dur": 4.059, + "args": { + "External id": 979039,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937270271.099, "dur": 1.056, + "args": { + "External id": 979040,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345937270297.014, "dur": 18.643, + "args": { + "External id": 979041,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937270338.082, "dur": 10.353, + "args": { + "External id": 979042,"Record function id": 0, "Ev Idx": 1633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937270340.417, "dur": 7.094, + "args": { + "External id": 979043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937270343.085, "dur": 3.175, + "args": { + "External id": 979044,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937270344.256, "dur": 1.844, + "args": { + "External id": 979045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937270352.931, "dur": 3269.778, + "args": { + "External id": 979046,"Record function id": 0, "Ev Idx": 1637 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.25)", "pid": 2338710, "tid": 2379450, + "ts": 6345937270389.232, "dur": 1115.652, + "args": { + "External id": 979047,"Record function id": 0, "Ev Idx": 1638 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.24", "pid": 2338710, "tid": 2379450, + "ts": 6345937270415.630, "dur": 1078.666, + "args": { + "External id": 979048,"Record function id": 0, "Ev Idx": 1639 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2338710, "tid": 2379450, + "ts": 6345937270432.524, "dur": 1042.957, + "args": { + "External id": 979049,"Record function id": 0, "Ev Idx": 1640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937270520.525, "dur": 6.240, + "args": { + "External id": 979050,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345937270543.772, "dur": 40.933, + "args": { + "External id": 979051,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937270552.852, "dur": 1.576, + "args": { + "External id": 979052,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937270557.965, "dur": 0.503, + "args": { + "External id": 979053,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937270559.320, "dur": 0.465, + "args": { + "External id": 979054,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937270562.837, "dur": 1.968, + "args": { + "External id": 979055,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937270565.481, "dur": 2.811, + "args": { + "External id": 979056,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937270570.845, "dur": 0.379, + "args": { + "External id": 979057,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937270573.775, "dur": 0.363, + "args": { + "External id": 979058,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937270574.971, "dur": 0.494, + "args": { + "External id": 979059,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937270578.392, "dur": 0.334, + "args": { + "External id": 979060,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937270597.373, "dur": 46.781, + "args": { + "External id": 979061,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345937270687.249, "dur": 135.967, + "args": { + "External id": 979062,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 1653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937270699.432, "dur": 4.150, + "args": { + "External id": 979063,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345937270709.612, "dur": 14.383, + "args": { + "External id": 979064,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345937270713.951, "dur": 9.540, + "args": { + "External id": 979065,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 1656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937270719.557, "dur": 2.386, + "args": { + "External id": 979066,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345937270731.472, "dur": 32.108, + "args": { + "External id": 979067,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937270732.892, "dur": 0.517, + "args": { + "External id": 979068,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937270736.395, "dur": 3.137, + "args": { + "External id": 979069,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937270740.354, "dur": 0.352, + "args": { + "External id": 979070,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937270746.132, "dur": 0.444, + "args": { + "External id": 979071,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937270747.679, "dur": 0.586, + "args": { + "External id": 979072,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937270749.006, "dur": 2.173, + "args": { + "External id": 979073,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937270753.138, "dur": 0.505, + "args": { + "External id": 979074,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937270754.520, "dur": 0.511, + "args": { + "External id": 979075,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937270757.573, "dur": 0.323, + "args": { + "External id": 979076,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937270776.780, "dur": 35.113, + "args": { + "External id": 979077,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345937270888.841, "dur": 460.675, + "args": { + "External id": 979078,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 1669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937270924.187, "dur": 418.558, + "args": { + "External id": 979079,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1670, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345937270935.393, "dur": 400.107, + "args": { + "External id": 979080,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 1671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937271380.098, "dur": 2.856, + "args": { + "External id": 979081,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1672, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937271513.777, "dur": 2086.208, + "args": { + "External id": 979082,"Sequence number": 10552294, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1673 + } + }, + { + "ph": "f", "id": 172, "pid": 2338710, "tid": 2379450, "ts": 6345937271513.777, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937271650.330, "dur": 132.160, + "args": { + "External id": 979083,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 1674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345937271833.297, "dur": 46.089, + "args": { + "External id": 979084,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 1675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345937271902.162, "dur": 58.106, + "args": { + "External id": 979085,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 1676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937271971.174, "dur": 35.258, + "args": { + "External id": 979086,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937272041.331, "dur": 85.603, + "args": { + "External id": 979087,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937272139.903, "dur": 32.920, + "args": { + "External id": 979088,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937272184.323, "dur": 34.136, + "args": { + "External id": 979089,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345937272254.777, "dur": 28.780, + "args": { + "External id": 979090,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 1681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345937272311.240, "dur": 32.260, + "args": { + "External id": 979091,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345937272371.792, "dur": 23.699, + "args": { + "External id": 979092,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 1683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345937272416.261, "dur": 17.467, + "args": { + "External id": 979093,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 1684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937272445.689, "dur": 45.899, + "args": { + "External id": 979094,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 1685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937272495.856, "dur": 38.972, + "args": { + "External id": 979095,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 1686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937272569.596, "dur": 295.836, + "args": { + "External id": 979096,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937272666.614, "dur": 11.243, + "args": { + "External id": 979097,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937272680.202, "dur": 3.232, + "args": { + "External id": 979098,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937272684.766, "dur": 1.928, + "args": { + "External id": 979099,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937272687.646, "dur": 4.060, + "args": { + "External id": 979100,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937272739.598, "dur": 8.483, + "args": { + "External id": 979101,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937272743.092, "dur": 4.743, + "args": { + "External id": 979102,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937272750.212, "dur": 36.660, + "args": { + "External id": 979103,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937272756.506, "dur": 2.078, + "args": { + "External id": 979104,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937272788.470, "dur": 4.784, + "args": { + "External id": 979105,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937272792.542, "dur": 0.566, + "args": { + "External id": 979106,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937272794.188, "dur": 18.579, + "args": { + "External id": 979107,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937272797.147, "dur": 0.664, + "args": { + "External id": 979108,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345937272907.370, "dur": 39.161, + "args": { + "External id": 979109,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345937272966.931, "dur": 21.980, + "args": { + "External id": 979110,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937272996.829, "dur": 121.026, + "args": { + "External id": 979111,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 1702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937273130.693, "dur": 52.737, + "args": { + "External id": 979112,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 1703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937273197.941, "dur": 26.782, + "args": { + "External id": 979113,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 1704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937273230.971, "dur": 35.400, + "args": { + "External id": 979114,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 1705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937273274.137, "dur": 30.549, + "args": { + "External id": 979115,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 1706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937273312.200, "dur": 32.933, + "args": { + "External id": 979116,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 1707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345937273373.854, "dur": 28.289, + "args": { + "External id": 979117,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 1708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345937273424.481, "dur": 26.919, + "args": { + "External id": 979118,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345937273472.294, "dur": 18.512, + "args": { + "External id": 979119,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 1710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345937273514.482, "dur": 16.130, + "args": { + "External id": 979120,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 1711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345937273546.816, "dur": 17.744, + "args": { + "External id": 979121,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 1712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937273649.508, "dur": 19.656, + "args": { + "External id": 979122,"Record function id": 0, "Ev Idx": 1713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937273653.384, "dur": 14.677, + "args": { + "External id": 979123,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937273658.506, "dur": 8.204, + "args": { + "External id": 979124,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937273660.373, "dur": 6.215, + "args": { + "External id": 979125,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937273673.968, "dur": 5.779, + "args": { + "External id": 979126,"Record function id": 0, "Ev Idx": 1717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937273675.594, "dur": 3.493, + "args": { + "External id": 979127,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937273676.734, "dur": 1.634, + "args": { + "External id": 979128,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937273677.213, "dur": 1.029, + "args": { + "External id": 979129,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937273683.682, "dur": 6.954, + "args": { + "External id": 979130,"Record function id": 0, "Ev Idx": 1721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937273684.888, "dur": 5.270, + "args": { + "External id": 979131,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937273685.467, "dur": 4.199, + "args": { + "External id": 979132,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937273686.033, "dur": 3.451, + "args": { + "External id": 979133,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937273694.788, "dur": 4.473, + "args": { + "External id": 979134,"Record function id": 0, "Ev Idx": 1725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937273695.952, "dur": 2.863, + "args": { + "External id": 979135,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937273696.832, "dur": 1.483, + "args": { + "External id": 979136,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937273697.443, "dur": 0.767, + "args": { + "External id": 979137,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937273703.077, "dur": 14.625, + "args": { + "External id": 979138,"Record function id": 0, "Ev Idx": 1729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937273704.538, "dur": 12.694, + "args": { + "External id": 979139,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937273714.923, "dur": 1.700, + "args": { + "External id": 979140,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937273715.596, "dur": 0.955, + "args": { + "External id": 979141,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937273721.484, "dur": 8.476, + "args": { + "External id": 979142,"Record function id": 0, "Ev Idx": 1733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937273724.536, "dur": 4.944, + "args": { + "External id": 979143,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937273725.282, "dur": 3.535, + "args": { + "External id": 979144,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937273727.982, "dur": 0.689, + "args": { + "External id": 979145,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937273735.701, "dur": 4.559, + "args": { + "External id": 979146,"Record function id": 0, "Ev Idx": 1737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937273737.020, "dur": 2.783, + "args": { + "External id": 979147,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937273737.740, "dur": 1.498, + "args": { + "External id": 979148,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937273738.289, "dur": 0.832, + "args": { + "External id": 979149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937273744.102, "dur": 4.319, + "args": { + "External id": 979150,"Record function id": 0, "Ev Idx": 1741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937273745.576, "dur": 2.404, + "args": { + "External id": 979151,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937273746.267, "dur": 1.179, + "args": { + "External id": 979152,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937273746.726, "dur": 0.644, + "args": { + "External id": 979153,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937273752.084, "dur": 3.708, + "args": { + "External id": 979154,"Record function id": 0, "Ev Idx": 1745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937273753.135, "dur": 2.182, + "args": { + "External id": 979155,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937273753.684, "dur": 1.131, + "args": { + "External id": 979156,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937273754.115, "dur": 0.623, + "args": { + "External id": 979157,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937273760.370, "dur": 284576.942, + "args": { + "External id": 979158,"Record function id": 0, "Sequence number": 10552293, "Fwd thread id": 1, "Ev Idx": 1749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937273761.874, "dur": 284565.183, + "args": { + "External id": 979159,"Sequence number": 10552293, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1750 + } + }, + { + "ph": "f", "id": 173, "pid": 2338710, "tid": 2379450, "ts": 6345937273761.874, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.25)", "pid": 2338710, "tid": 2379450, + "ts": 6345937273798.954, "dur": 49.169, + "args": { + "External id": 979160,"Record function id": 0, "Ev Idx": 1751 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.25)", "pid": 2338710, "tid": 2379450, + "ts": 6345937273859.868, "dur": 74.542, + "args": { + "External id": 979161,"Record function id": 0, "Ev Idx": 1752 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.25)", "pid": 2338710, "tid": 2379450, + "ts": 6345937273941.393, "dur": 284374.908, + "args": { + "External id": 979162,"Record function id": 0, "Ev Idx": 1753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937274005.274, "dur": 31.560, + "args": { + "External id": 979163,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937274052.872, "dur": 42.747, + "args": { + "External id": 979164,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 1755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345937274119.589, "dur": 283196.782, + "args": { + "External id": 979165,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 1756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345937274137.966, "dur": 283160.745, + "args": { + "External id": 979166,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 1757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937274283.410, "dur": 9.754, + "args": { + "External id": 979167,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345937274317.828, "dur": 282919.961, + "args": { + "External id": 979168,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 1759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937274321.448, "dur": 282914.958, + "args": { + "External id": 979169,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 1760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937274325.835, "dur": 19.645, + "args": { + "External id": 979170,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937274348.270, "dur": 282880.946, + "args": { + "External id": 979171,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 1762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937557448.514, "dur": 21.787, + "args": { + "External id": 979172,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 1763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937557458.278, "dur": 11.530, + "args": { + "External id": 979173,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345937557517.368, "dur": 369.238, + "args": { + "External id": 979174,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 1765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937557558.861, "dur": 320.900, + "args": { + "External id": 979175,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1766, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345937557573.017, "dur": 297.151, + "args": { + "External id": 979176,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 1767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937557911.976, "dur": 2.881, + "args": { + "External id": 979177,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1768, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937557985.644, "dur": 8.387, + "args": { + "External id": 979178,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937558115.284, "dur": 3.746, + "args": { + "External id": 979179,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937558138.490, "dur": 4.721, + "args": { + "External id": 979180,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937558159.064, "dur": 1.186, + "args": { + "External id": 979181,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937558174.447, "dur": 0.933, + "args": { + "External id": 979182,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937558189.486, "dur": 1.084, + "args": { + "External id": 979183,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937558203.486, "dur": 3.733, + "args": { + "External id": 979184,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937558222.244, "dur": 3.253, + "args": { + "External id": 979185,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937558237.688, "dur": 1.044, + "args": { + "External id": 979186,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937558356.117, "dur": 3418.845, + "args": { + "External id": 979187,"Record function id": 0, "Ev Idx": 1778 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.24)", "pid": 2338710, "tid": 2379450, + "ts": 6345937558379.093, "dur": 1271.377, + "args": { + "External id": 979188,"Record function id": 0, "Ev Idx": 1779 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2338710, "tid": 2379450, + "ts": 6345937558396.471, "dur": 390.075, + "args": { + "External id": 979189,"Record function id": 0, "Ev Idx": 1780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937558497.067, "dur": 5.295, + "args": { + "External id": 979190,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 1781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937558505.732, "dur": 1.143, + "args": { + "External id": 979191,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 1782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937558509.043, "dur": 3.313, + "args": { + "External id": 979192,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 1783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937558514.264, "dur": 0.794, + "args": { + "External id": 979193,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937558516.633, "dur": 0.885, + "args": { + "External id": 979194,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937558521.846, "dur": 0.743, + "args": { + "External id": 979195,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 1786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937558525.940, "dur": 2.718, + "args": { + "External id": 979196,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 1787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937558530.235, "dur": 0.995, + "args": { + "External id": 979197,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937558532.687, "dur": 1.229, + "args": { + "External id": 979198,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937558537.550, "dur": 1.014, + "args": { + "External id": 979199,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937558558.702, "dur": 191.650, + "args": { + "External id": 979200,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 1791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937558580.418, "dur": 163.951, + "args": { + "External id": 979201,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 1792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937558600.834, "dur": 19.241, + "args": { + "External id": 979202,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345937558625.523, "dur": 81.636, + "args": { + "External id": 979203,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 1794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937558628.646, "dur": 78.128, + "args": { + "External id": 979204,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 1795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937558634.900, "dur": 7.318, + "args": { + "External id": 979205,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937558644.537, "dur": 61.477, + "args": { + "External id": 979206,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 1797 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.23", "pid": 2338710, "tid": 2379450, + "ts": 6345937558874.451, "dur": 767.042, + "args": { + "External id": 979207,"Record function id": 0, "Ev Idx": 1798 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2338710, "tid": 2379450, + "ts": 6345937558892.746, "dur": 734.537, + "args": { + "External id": 979208,"Record function id": 0, "Ev Idx": 1799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937558954.332, "dur": 6.619, + "args": { + "External id": 979209,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345937558978.123, "dur": 62.268, + "args": { + "External id": 979210,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937558984.500, "dur": 1.436, + "args": { + "External id": 979211,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937558987.786, "dur": 2.280, + "args": { + "External id": 979212,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937558992.239, "dur": 0.529, + "args": { + "External id": 979213,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937558994.407, "dur": 0.613, + "args": { + "External id": 979214,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937558999.152, "dur": 0.428, + "args": { + "External id": 979215,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937559001.197, "dur": 2.749, + "args": { + "External id": 979216,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937559005.802, "dur": 0.313, + "args": { + "External id": 979217,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937559028.658, "dur": 0.739, + "args": { + "External id": 979218,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937559032.787, "dur": 0.435, + "args": { + "External id": 979219,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937559091.218, "dur": 59.048, + "args": { + "External id": 979220,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345937559194.366, "dur": 146.747, + "args": { + "External id": 979221,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 1812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937559209.394, "dur": 4.916, + "args": { + "External id": 979222,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345937559220.460, "dur": 18.516, + "args": { + "External id": 979223,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345937559230.675, "dur": 7.812, + "args": { + "External id": 979224,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 1815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937559235.520, "dur": 0.855, + "args": { + "External id": 979225,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345937559247.095, "dur": 34.465, + "args": { + "External id": 979226,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937559249.991, "dur": 0.543, + "args": { + "External id": 979227,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937559254.203, "dur": 0.504, + "args": { + "External id": 979228,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937559256.511, "dur": 2.844, + "args": { + "External id": 979229,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937559261.100, "dur": 2.194, + "args": { + "External id": 979230,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937559264.699, "dur": 0.396, + "args": { + "External id": 979231,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937559266.926, "dur": 0.544, + "args": { + "External id": 979232,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937559270.415, "dur": 0.381, + "args": { + "External id": 979233,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937559272.525, "dur": 0.414, + "args": { + "External id": 979234,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937559274.617, "dur": 0.473, + "args": { + "External id": 979235,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937559297.575, "dur": 34.524, + "args": { + "External id": 979236,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345937559397.778, "dur": 145.062, + "args": { + "External id": 979237,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 1828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937559434.899, "dur": 104.192, + "args": { + "External id": 979238,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1829, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345937559446.771, "dur": 87.489, + "args": { + "External id": 979239,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 1830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937559564.415, "dur": 2.131, + "args": { + "External id": 979240,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1831, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937559658.851, "dur": 2092.684, + "args": { + "External id": 979241,"Sequence number": 10552292, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1832 + } + }, + { + "ph": "f", "id": 174, "pid": 2338710, "tid": 2379450, "ts": 6345937559658.851, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937559784.367, "dur": 125.455, + "args": { + "External id": 979242,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 1833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345937559957.385, "dur": 45.690, + "args": { + "External id": 979243,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 1834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345937560049.227, "dur": 112.140, + "args": { + "External id": 979244,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 1835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937560180.709, "dur": 37.777, + "args": { + "External id": 979245,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937560228.519, "dur": 37.997, + "args": { + "External id": 979246,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937560275.829, "dur": 32.607, + "args": { + "External id": 979247,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937560317.413, "dur": 33.398, + "args": { + "External id": 979248,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345937560386.674, "dur": 32.005, + "args": { + "External id": 979249,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 1840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345937560443.096, "dur": 33.579, + "args": { + "External id": 979250,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345937560502.605, "dur": 22.762, + "args": { + "External id": 979251,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 1842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345937560545.409, "dur": 20.405, + "args": { + "External id": 979252,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 1843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937560574.044, "dur": 44.170, + "args": { + "External id": 979253,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 1844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937560622.521, "dur": 36.636, + "args": { + "External id": 979254,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 1845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937560695.402, "dur": 343.223, + "args": { + "External id": 979255,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937560790.571, "dur": 6.597, + "args": { + "External id": 979256,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937560799.431, "dur": 2.484, + "args": { + "External id": 979257,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937560803.426, "dur": 2.280, + "args": { + "External id": 979258,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937560807.025, "dur": 3.930, + "args": { + "External id": 979259,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937560859.114, "dur": 12.750, + "args": { + "External id": 979260,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937560861.020, "dur": 3.176, + "args": { + "External id": 979261,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937560878.776, "dur": 48.651, + "args": { + "External id": 979262,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937560886.666, "dur": 5.339, + "args": { + "External id": 979263,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937560929.930, "dur": 2.730, + "args": { + "External id": 979264,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937560931.835, "dur": 0.652, + "args": { + "External id": 979265,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937560933.950, "dur": 19.592, + "args": { + "External id": 979266,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937560938.264, "dur": 0.557, + "args": { + "External id": 979267,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345937561136.781, "dur": 40.528, + "args": { + "External id": 979268,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345937561200.641, "dur": 19.133, + "args": { + "External id": 979269,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937561229.838, "dur": 63.262, + "args": { + "External id": 979270,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 1861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937561300.666, "dur": 49.054, + "args": { + "External id": 979271,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 1862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937561361.958, "dur": 25.977, + "args": { + "External id": 979272,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 1863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937561393.274, "dur": 35.287, + "args": { + "External id": 979273,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 1864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937561435.596, "dur": 31.836, + "args": { + "External id": 979274,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 1865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937561474.592, "dur": 33.320, + "args": { + "External id": 979275,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 1866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345937561532.509, "dur": 30.400, + "args": { + "External id": 979276,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 1867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345937561582.294, "dur": 26.838, + "args": { + "External id": 979277,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345937561628.435, "dur": 20.045, + "args": { + "External id": 979278,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 1869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345937561668.961, "dur": 15.763, + "args": { + "External id": 979279,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 1870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345937561700.699, "dur": 16.691, + "args": { + "External id": 979280,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 1871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937561801.298, "dur": 16.923, + "args": { + "External id": 979281,"Record function id": 0, "Ev Idx": 1872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937561805.073, "dur": 12.265, + "args": { + "External id": 979282,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937561810.040, "dur": 6.221, + "args": { + "External id": 979283,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937561811.613, "dur": 4.488, + "args": { + "External id": 979284,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937561822.775, "dur": 5.666, + "args": { + "External id": 979285,"Record function id": 0, "Ev Idx": 1876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937561824.348, "dur": 3.345, + "args": { + "External id": 979286,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937561825.242, "dur": 1.910, + "args": { + "External id": 979287,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937561825.974, "dur": 1.071, + "args": { + "External id": 979288,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937561832.607, "dur": 7.755, + "args": { + "External id": 979289,"Record function id": 0, "Ev Idx": 1880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937561834.095, "dur": 5.744, + "args": { + "External id": 979290,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937561834.785, "dur": 4.361, + "args": { + "External id": 979291,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937561835.572, "dur": 3.421, + "args": { + "External id": 979292,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937561844.473, "dur": 5.024, + "args": { + "External id": 979293,"Record function id": 0, "Ev Idx": 1884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937561845.832, "dur": 3.143, + "args": { + "External id": 979294,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937561846.633, "dur": 1.810, + "args": { + "External id": 979295,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937561847.243, "dur": 1.107, + "args": { + "External id": 979296,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937561853.288, "dur": 4.398, + "args": { + "External id": 979297,"Record function id": 0, "Ev Idx": 1888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937561854.500, "dur": 2.703, + "args": { + "External id": 979298,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937561855.376, "dur": 1.306, + "args": { + "External id": 979299,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937561855.839, "dur": 0.753, + "args": { + "External id": 979300,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937561861.378, "dur": 4.941, + "args": { + "External id": 979301,"Record function id": 0, "Ev Idx": 1892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937561862.968, "dur": 2.844, + "args": { + "External id": 979302,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937561863.752, "dur": 1.549, + "args": { + "External id": 979303,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937561864.181, "dur": 1.018, + "args": { + "External id": 979304,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937561870.227, "dur": 13.105, + "args": { + "External id": 979305,"Record function id": 0, "Ev Idx": 1896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937561871.496, "dur": 11.156, + "args": { + "External id": 979306,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937561872.116, "dur": 10.035, + "args": { + "External id": 979307,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937561881.216, "dur": 0.800, + "args": { + "External id": 979308,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937561886.987, "dur": 4.604, + "args": { + "External id": 979309,"Record function id": 0, "Ev Idx": 1900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937561888.539, "dur": 2.559, + "args": { + "External id": 979310,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937561889.161, "dur": 1.468, + "args": { + "External id": 979311,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937561889.640, "dur": 0.888, + "args": { + "External id": 979312,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937561895.145, "dur": 4.450, + "args": { + "External id": 979313,"Record function id": 0, "Ev Idx": 1904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937561896.634, "dur": 2.455, + "args": { + "External id": 979314,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937561897.258, "dur": 1.366, + "args": { + "External id": 979315,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937561897.596, "dur": 0.912, + "args": { + "External id": 979316,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937561904.060, "dur": 78238.904, + "args": { + "External id": 979317,"Record function id": 0, "Sequence number": 10552291, "Fwd thread id": 1, "Ev Idx": 1908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937561905.608, "dur": 78224.711, + "args": { + "External id": 979318,"Sequence number": 10552291, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1909 + } + }, + { + "ph": "f", "id": 175, "pid": 2338710, "tid": 2379450, "ts": 6345937561905.608, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.24)", "pid": 2338710, "tid": 2379450, + "ts": 6345937561943.853, "dur": 45.425, + "args": { + "External id": 979319,"Record function id": 0, "Ev Idx": 1910 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.24)", "pid": 2338710, "tid": 2379450, + "ts": 6345937561998.722, "dur": 138.900, + "args": { + "External id": 979320,"Record function id": 0, "Ev Idx": 1911 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.24)", "pid": 2338710, "tid": 2379450, + "ts": 6345937562147.339, "dur": 77971.066, + "args": { + "External id": 979321,"Record function id": 0, "Ev Idx": 1912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937562259.697, "dur": 9.158, + "args": { + "External id": 979322,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937562281.876, "dur": 8.326, + "args": { + "External id": 979323,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 1914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345937562308.769, "dur": 76666.878, + "args": { + "External id": 979324,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 1915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345937562325.117, "dur": 76633.460, + "args": { + "External id": 979325,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 1916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937562444.227, "dur": 22.643, + "args": { + "External id": 979326,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345937562494.825, "dur": 76403.904, + "args": { + "External id": 979327,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 1918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937562499.313, "dur": 76397.972, + "args": { + "External id": 979328,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 1919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937562504.831, "dur": 12.713, + "args": { + "External id": 979329,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937562521.248, "dur": 76368.816, + "args": { + "External id": 979330,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 1921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937639165.524, "dur": 16.634, + "args": { + "External id": 979331,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 1922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937639171.140, "dur": 10.264, + "args": { + "External id": 979332,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345937639224.265, "dur": 453.978, + "args": { + "External id": 979333,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 1924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937639268.029, "dur": 403.415, + "args": { + "External id": 979334,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1925, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345937639286.187, "dur": 376.769, + "args": { + "External id": 979335,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 1926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937639705.651, "dur": 2.798, + "args": { + "External id": 979336,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1927, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937639781.085, "dur": 8.654, + "args": { + "External id": 979337,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937639851.970, "dur": 1.421, + "args": { + "External id": 979338,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937639874.292, "dur": 4.695, + "args": { + "External id": 979339,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937639894.753, "dur": 1.283, + "args": { + "External id": 979340,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937639914.064, "dur": 1.138, + "args": { + "External id": 979341,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937639930.251, "dur": 0.886, + "args": { + "External id": 979342,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937639946.400, "dur": 3.933, + "args": { + "External id": 979343,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937639964.954, "dur": 2.903, + "args": { + "External id": 979344,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937639982.578, "dur": 0.686, + "args": { + "External id": 979345,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937640165.079, "dur": 3431.114, + "args": { + "External id": 979346,"Record function id": 0, "Ev Idx": 1937 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.23)", "pid": 2338710, "tid": 2379450, + "ts": 6345937640190.599, "dur": 1306.914, + "args": { + "External id": 979347,"Record function id": 0, "Ev Idx": 1938 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2338710, "tid": 2379450, + "ts": 6345937640210.566, "dur": 414.845, + "args": { + "External id": 979348,"Record function id": 0, "Ev Idx": 1939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937640311.559, "dur": 5.784, + "args": { + "External id": 979349,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 1940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937640321.624, "dur": 1.068, + "args": { + "External id": 979350,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 1941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937640324.571, "dur": 2.984, + "args": { + "External id": 979351,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 1942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937640329.938, "dur": 1.003, + "args": { + "External id": 979352,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937640335.098, "dur": 0.687, + "args": { + "External id": 979353,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937640337.591, "dur": 1.236, + "args": { + "External id": 979354,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 1945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937640340.549, "dur": 3.104, + "args": { + "External id": 979355,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 1946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937640345.163, "dur": 0.839, + "args": { + "External id": 979356,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937640349.560, "dur": 1.053, + "args": { + "External id": 979357,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937640352.299, "dur": 1.144, + "args": { + "External id": 979358,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937640388.570, "dur": 199.238, + "args": { + "External id": 979359,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 1950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937640410.016, "dur": 172.904, + "args": { + "External id": 979360,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 1951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937640430.338, "dur": 19.491, + "args": { + "External id": 979361,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345937640455.351, "dur": 93.347, + "args": { + "External id": 979362,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 1953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937640461.046, "dur": 87.141, + "args": { + "External id": 979363,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 1954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937640466.215, "dur": 7.458, + "args": { + "External id": 979364,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937640475.509, "dur": 71.704, + "args": { + "External id": 979365,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 1956 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.22", "pid": 2338710, "tid": 2379450, + "ts": 6345937640718.959, "dur": 768.851, + "args": { + "External id": 979366,"Record function id": 0, "Ev Idx": 1957 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2338710, "tid": 2379450, + "ts": 6345937640739.203, "dur": 733.423, + "args": { + "External id": 979367,"Record function id": 0, "Ev Idx": 1958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937640803.489, "dur": 6.730, + "args": { + "External id": 979368,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345937640827.910, "dur": 42.999, + "args": { + "External id": 979369,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937640833.630, "dur": 1.854, + "args": { + "External id": 979370,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937640837.548, "dur": 2.548, + "args": { + "External id": 979371,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937640842.320, "dur": 0.689, + "args": { + "External id": 979372,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937640844.981, "dur": 0.425, + "args": { + "External id": 979373,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937640852.523, "dur": 0.614, + "args": { + "External id": 979374,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937640854.712, "dur": 2.790, + "args": { + "External id": 979375,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937640859.040, "dur": 0.357, + "args": { + "External id": 979376,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937640862.535, "dur": 0.278, + "args": { + "External id": 979377,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937640864.428, "dur": 0.354, + "args": { + "External id": 979378,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937640881.799, "dur": 49.560, + "args": { + "External id": 979379,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345937640968.124, "dur": 212.054, + "args": { + "External id": 979380,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 1971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937640980.047, "dur": 4.076, + "args": { + "External id": 979381,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345937640990.513, "dur": 11.889, + "args": { + "External id": 979382,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345937640995.568, "dur": 6.361, + "args": { + "External id": 979383,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 1974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937640999.765, "dur": 0.551, + "args": { + "External id": 979384,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345937641035.527, "dur": 81.781, + "args": { + "External id": 979385,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937641039.975, "dur": 0.684, + "args": { + "External id": 979386,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937641044.482, "dur": 0.461, + "args": { + "External id": 979387,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937641046.562, "dur": 2.506, + "args": { + "External id": 979388,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937641050.948, "dur": 43.468, + "args": { + "External id": 979389,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937641099.107, "dur": 0.671, + "args": { + "External id": 979390,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937641101.385, "dur": 0.498, + "args": { + "External id": 979391,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937641105.674, "dur": 0.575, + "args": { + "External id": 979392,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937641107.988, "dur": 0.339, + "args": { + "External id": 979393,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937641110.049, "dur": 0.579, + "args": { + "External id": 979394,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937641132.095, "dur": 37.726, + "args": { + "External id": 979395,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345937641239.023, "dur": 149.437, + "args": { + "External id": 979396,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 1987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937641279.365, "dur": 104.731, + "args": { + "External id": 979397,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1988, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345937641291.212, "dur": 87.532, + "args": { + "External id": 979398,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 1989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937641409.542, "dur": 1.996, + "args": { + "External id": 979399,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1990, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937641506.940, "dur": 2063.321, + "args": { + "External id": 979400,"Sequence number": 10552290, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1991 + } + }, + { + "ph": "f", "id": 176, "pid": 2338710, "tid": 2379450, "ts": 6345937641506.940, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937641632.925, "dur": 125.132, + "args": { + "External id": 979401,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 1992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345937641805.879, "dur": 45.220, + "args": { + "External id": 979402,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 1993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345937641870.405, "dur": 58.688, + "args": { + "External id": 979403,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 1994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937641943.029, "dur": 34.653, + "args": { + "External id": 979404,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937641984.740, "dur": 127.933, + "args": { + "External id": 979405,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937642127.672, "dur": 39.168, + "args": { + "External id": 979406,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937642174.742, "dur": 33.826, + "args": { + "External id": 979407,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345937642243.508, "dur": 30.694, + "args": { + "External id": 979408,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 1999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345937642296.800, "dur": 30.240, + "args": { + "External id": 979409,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345937642354.071, "dur": 23.014, + "args": { + "External id": 979410,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345937642391.687, "dur": 18.877, + "args": { + "External id": 979411,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937642418.747, "dur": 45.087, + "args": { + "External id": 979412,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937642467.913, "dur": 36.490, + "args": { + "External id": 979413,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937642540.108, "dur": 316.785, + "args": { + "External id": 979414,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937642647.515, "dur": 9.795, + "args": { + "External id": 979415,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937642660.152, "dur": 2.695, + "args": { + "External id": 979416,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937642664.324, "dur": 1.572, + "args": { + "External id": 979417,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937642667.115, "dur": 3.060, + "args": { + "External id": 979418,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937642726.653, "dur": 9.626, + "args": { + "External id": 979419,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937642728.884, "dur": 7.184, + "args": { + "External id": 979420,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937642739.054, "dur": 41.041, + "args": { + "External id": 979421,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937642745.659, "dur": 5.361, + "args": { + "External id": 979422,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937642782.154, "dur": 1.878, + "args": { + "External id": 979423,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937642783.258, "dur": 0.676, + "args": { + "External id": 979424,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937642785.265, "dur": 19.724, + "args": { + "External id": 979425,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937642787.236, "dur": 0.498, + "args": { + "External id": 979426,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345937642897.806, "dur": 32.537, + "args": { + "External id": 979427,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345937642951.624, "dur": 18.259, + "args": { + "External id": 979428,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937642978.196, "dur": 69.852, + "args": { + "External id": 979429,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937643112.583, "dur": 54.960, + "args": { + "External id": 979430,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937643181.294, "dur": 25.106, + "args": { + "External id": 979431,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937643212.656, "dur": 34.181, + "args": { + "External id": 979432,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937643254.885, "dur": 30.260, + "args": { + "External id": 979433,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937643291.737, "dur": 32.100, + "args": { + "External id": 979434,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345937643350.520, "dur": 30.609, + "args": { + "External id": 979435,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345937643400.715, "dur": 26.402, + "args": { + "External id": 979436,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345937643446.017, "dur": 19.797, + "args": { + "External id": 979437,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345937643487.820, "dur": 15.039, + "args": { + "External id": 979438,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345937643519.171, "dur": 17.100, + "args": { + "External id": 979439,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937643622.161, "dur": 17.768, + "args": { + "External id": 979440,"Record function id": 0, "Ev Idx": 2031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937643625.956, "dur": 12.693, + "args": { + "External id": 979441,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937643630.739, "dur": 6.750, + "args": { + "External id": 979442,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937643632.784, "dur": 4.503, + "args": { + "External id": 979443,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937643644.594, "dur": 5.780, + "args": { + "External id": 979444,"Record function id": 0, "Ev Idx": 2035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937643646.147, "dur": 3.617, + "args": { + "External id": 979445,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937643647.120, "dur": 2.130, + "args": { + "External id": 979446,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937643648.253, "dur": 0.897, + "args": { + "External id": 979447,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937643654.402, "dur": 7.435, + "args": { + "External id": 979448,"Record function id": 0, "Ev Idx": 2039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937643655.683, "dur": 5.637, + "args": { + "External id": 979449,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937643656.300, "dur": 4.552, + "args": { + "External id": 979450,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937643657.176, "dur": 3.550, + "args": { + "External id": 979451,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937643665.678, "dur": 4.699, + "args": { + "External id": 979452,"Record function id": 0, "Ev Idx": 2043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937643667.266, "dur": 2.618, + "args": { + "External id": 979453,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937643667.931, "dur": 1.485, + "args": { + "External id": 979454,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937643668.407, "dur": 0.904, + "args": { + "External id": 979455,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937643674.009, "dur": 42.135, + "args": { + "External id": 979456,"Record function id": 0, "Ev Idx": 2047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937643712.973, "dur": 2.673, + "args": { + "External id": 979457,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937643713.664, "dur": 1.426, + "args": { + "External id": 979458,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937643714.211, "dur": 0.786, + "args": { + "External id": 979459,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937643719.845, "dur": 7.694, + "args": { + "External id": 979460,"Record function id": 0, "Ev Idx": 2051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937643722.373, "dur": 4.672, + "args": { + "External id": 979461,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937643722.991, "dur": 3.579, + "args": { + "External id": 979462,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937643725.599, "dur": 0.830, + "args": { + "External id": 979463,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937643732.629, "dur": 4.411, + "args": { + "External id": 979464,"Record function id": 0, "Ev Idx": 2055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937643734.178, "dur": 2.409, + "args": { + "External id": 979465,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937643734.748, "dur": 1.372, + "args": { + "External id": 979466,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937643735.273, "dur": 0.743, + "args": { + "External id": 979467,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937643740.761, "dur": 4.877, + "args": { + "External id": 979468,"Record function id": 0, "Ev Idx": 2059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937643742.429, "dur": 2.738, + "args": { + "External id": 979469,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937643743.526, "dur": 1.163, + "args": { + "External id": 979470,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937643743.846, "dur": 0.755, + "args": { + "External id": 979471,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937643749.318, "dur": 4.685, + "args": { + "External id": 979472,"Record function id": 0, "Ev Idx": 2063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937643750.864, "dur": 2.663, + "args": { + "External id": 979473,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937643751.527, "dur": 1.540, + "args": { + "External id": 979474,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937643752.129, "dur": 0.848, + "args": { + "External id": 979475,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937643758.714, "dur": 72750.701, + "args": { + "External id": 979476,"Record function id": 0, "Sequence number": 10552289, "Fwd thread id": 1, "Ev Idx": 2067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937643762.552, "dur": 72735.653, + "args": { + "External id": 979477,"Sequence number": 10552289, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2068 + } + }, + { + "ph": "f", "id": 177, "pid": 2338710, "tid": 2379450, "ts": 6345937643762.552, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.23)", "pid": 2338710, "tid": 2379450, + "ts": 6345937643801.922, "dur": 47.084, + "args": { + "External id": 979478,"Record function id": 0, "Ev Idx": 2069 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.23)", "pid": 2338710, "tid": 2379450, + "ts": 6345937643858.493, "dur": 76.224, + "args": { + "External id": 979479,"Record function id": 0, "Ev Idx": 2070 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.23)", "pid": 2338710, "tid": 2379450, + "ts": 6345937643941.928, "dur": 72545.814, + "args": { + "External id": 979480,"Record function id": 0, "Ev Idx": 2071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937644123.217, "dur": 9.937, + "args": { + "External id": 979481,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937644147.879, "dur": 7.933, + "args": { + "External id": 979482,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345937644178.225, "dur": 71215.605, + "args": { + "External id": 979483,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345937644194.833, "dur": 71181.530, + "args": { + "External id": 979484,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937644299.986, "dur": 21.934, + "args": { + "External id": 979485,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345937644358.547, "dur": 70965.623, + "args": { + "External id": 979486,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937644362.881, "dur": 70959.204, + "args": { + "External id": 979487,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937644369.701, "dur": 18.015, + "args": { + "External id": 979488,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937644390.320, "dur": 70928.635, + "args": { + "External id": 979489,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937715536.384, "dur": 16.813, + "args": { + "External id": 979490,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937715541.768, "dur": 10.909, + "args": { + "External id": 979491,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345937715595.217, "dur": 495.280, + "args": { + "External id": 979492,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937715638.907, "dur": 406.562, + "args": { + "External id": 979493,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2084, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345937715655.031, "dur": 381.818, + "args": { + "External id": 979494,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937716126.559, "dur": 3.244, + "args": { + "External id": 979495,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2086, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937716208.593, "dur": 9.013, + "args": { + "External id": 979496,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937716279.319, "dur": 1.557, + "args": { + "External id": 979497,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937716302.275, "dur": 4.322, + "args": { + "External id": 979498,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937716321.949, "dur": 1.290, + "args": { + "External id": 979499,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937716339.943, "dur": 1.011, + "args": { + "External id": 979500,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937716355.306, "dur": 1.010, + "args": { + "External id": 979501,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937716370.795, "dur": 4.279, + "args": { + "External id": 979502,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937716388.389, "dur": 3.046, + "args": { + "External id": 979503,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937716408.434, "dur": 1.046, + "args": { + "External id": 979504,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937716529.127, "dur": 3374.063, + "args": { + "External id": 979505,"Record function id": 0, "Ev Idx": 2096 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.22)", "pid": 2338710, "tid": 2379450, + "ts": 6345937716552.674, "dur": 1279.570, + "args": { + "External id": 979506,"Record function id": 0, "Ev Idx": 2097 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2338710, "tid": 2379450, + "ts": 6345937716571.223, "dur": 389.151, + "args": { + "External id": 979507,"Record function id": 0, "Ev Idx": 2098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937716670.658, "dur": 4.992, + "args": { + "External id": 979508,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937716679.645, "dur": 4.389, + "args": { + "External id": 979509,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937716686.539, "dur": 3.569, + "args": { + "External id": 979510,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937716692.518, "dur": 0.938, + "args": { + "External id": 979511,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937716695.230, "dur": 0.674, + "args": { + "External id": 979512,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937716697.904, "dur": 0.958, + "args": { + "External id": 979513,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937716700.773, "dur": 2.948, + "args": { + "External id": 979514,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937716707.465, "dur": 1.123, + "args": { + "External id": 979515,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937716710.436, "dur": 0.979, + "args": { + "External id": 979516,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937716713.031, "dur": 1.081, + "args": { + "External id": 979517,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937716734.227, "dur": 190.197, + "args": { + "External id": 979518,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937716758.195, "dur": 160.864, + "args": { + "External id": 979519,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937716784.421, "dur": 20.753, + "args": { + "External id": 979520,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345937716810.822, "dur": 76.133, + "args": { + "External id": 979521,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937716815.503, "dur": 71.050, + "args": { + "External id": 979522,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937716820.510, "dur": 6.828, + "args": { + "External id": 979523,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937716829.368, "dur": 56.423, + "args": { + "External id": 979524,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2115 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.21", "pid": 2338710, "tid": 2379450, + "ts": 6345937717126.103, "dur": 696.087, + "args": { + "External id": 979525,"Record function id": 0, "Ev Idx": 2116 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2338710, "tid": 2379450, + "ts": 6345937717148.302, "dur": 658.977, + "args": { + "External id": 979526,"Record function id": 0, "Ev Idx": 2117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937717218.149, "dur": 8.547, + "args": { + "External id": 979527,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345937717244.503, "dur": 39.625, + "args": { + "External id": 979528,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937717250.822, "dur": 2.214, + "args": { + "External id": 979529,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937717257.534, "dur": 0.415, + "args": { + "External id": 979530,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937717259.745, "dur": 0.373, + "args": { + "External id": 979531,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937717261.769, "dur": 0.682, + "args": { + "External id": 979532,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937717265.813, "dur": 0.677, + "args": { + "External id": 979533,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937717268.042, "dur": 2.798, + "args": { + "External id": 979534,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937717272.572, "dur": 1.932, + "args": { + "External id": 979535,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937717276.275, "dur": 0.262, + "args": { + "External id": 979536,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937717278.011, "dur": 0.311, + "args": { + "External id": 979537,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937717296.509, "dur": 56.063, + "args": { + "External id": 979538,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345937717390.986, "dur": 136.752, + "args": { + "External id": 979539,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937717403.449, "dur": 4.263, + "args": { + "External id": 979540,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345937717414.185, "dur": 11.893, + "args": { + "External id": 979541,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345937717418.930, "dur": 6.666, + "args": { + "External id": 979542,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937717423.460, "dur": 0.684, + "args": { + "External id": 979543,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345937717434.128, "dur": 34.802, + "args": { + "External id": 979544,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937717436.703, "dur": 1.912, + "args": { + "External id": 979545,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937717440.251, "dur": 0.564, + "args": { + "External id": 979546,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937717442.503, "dur": 3.660, + "args": { + "External id": 979547,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937717450.590, "dur": 0.535, + "args": { + "External id": 979548,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937717452.483, "dur": 0.625, + "args": { + "External id": 979549,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937717454.492, "dur": 0.426, + "args": { + "External id": 979550,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937717458.238, "dur": 0.376, + "args": { + "External id": 979551,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937717459.938, "dur": 0.442, + "args": { + "External id": 979552,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937717461.821, "dur": 1.646, + "args": { + "External id": 979553,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937717482.270, "dur": 36.198, + "args": { + "External id": 979554,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345937717579.697, "dur": 146.933, + "args": { + "External id": 979555,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937717614.483, "dur": 107.823, + "args": { + "External id": 979556,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2147, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345937717626.720, "dur": 90.656, + "args": { + "External id": 979557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937717746.417, "dur": 2.083, + "args": { + "External id": 979558,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2149, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937717840.331, "dur": 2038.469, + "args": { + "External id": 979559,"Sequence number": 10552288, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2150 + } + }, + { + "ph": "f", "id": 178, "pid": 2338710, "tid": 2379450, "ts": 6345937717840.331, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937717959.314, "dur": 192.729, + "args": { + "External id": 979560,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345937718206.599, "dur": 45.835, + "args": { + "External id": 979561,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345937718272.710, "dur": 62.079, + "args": { + "External id": 979562,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937718361.828, "dur": 36.916, + "args": { + "External id": 979563,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937718406.396, "dur": 36.028, + "args": { + "External id": 979564,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937718449.710, "dur": 30.214, + "args": { + "External id": 979565,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937718488.932, "dur": 31.605, + "args": { + "External id": 979566,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345937718554.440, "dur": 26.786, + "args": { + "External id": 979567,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345937718606.277, "dur": 32.073, + "args": { + "External id": 979568,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345937718662.575, "dur": 26.698, + "args": { + "External id": 979569,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345937718704.974, "dur": 16.966, + "args": { + "External id": 979570,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937718732.634, "dur": 41.938, + "args": { + "External id": 979571,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937718778.154, "dur": 36.680, + "args": { + "External id": 979572,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937718847.555, "dur": 394.081, + "args": { + "External id": 979573,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937718940.537, "dur": 6.937, + "args": { + "External id": 979574,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937718949.784, "dur": 3.894, + "args": { + "External id": 979575,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937718955.154, "dur": 3.403, + "args": { + "External id": 979576,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937718959.804, "dur": 1.741, + "args": { + "External id": 979577,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937719089.184, "dur": 12.459, + "args": { + "External id": 979578,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937719096.171, "dur": 4.333, + "args": { + "External id": 979579,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937719104.304, "dur": 42.391, + "args": { + "External id": 979580,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937719111.555, "dur": 4.856, + "args": { + "External id": 979581,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937719148.393, "dur": 1.875, + "args": { + "External id": 979582,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937719149.647, "dur": 0.474, + "args": { + "External id": 979583,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937719151.462, "dur": 19.733, + "args": { + "External id": 979584,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937719153.831, "dur": 0.768, + "args": { + "External id": 979585,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345937719288.186, "dur": 33.469, + "args": { + "External id": 979586,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345937719343.955, "dur": 18.309, + "args": { + "External id": 979587,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937719371.314, "dur": 57.615, + "args": { + "External id": 979588,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937719435.951, "dur": 42.817, + "args": { + "External id": 979589,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937719490.133, "dur": 24.147, + "args": { + "External id": 979590,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937719520.663, "dur": 33.022, + "args": { + "External id": 979591,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937719561.275, "dur": 31.057, + "args": { + "External id": 979592,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937719600.867, "dur": 33.312, + "args": { + "External id": 979593,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345937719658.254, "dur": 27.819, + "args": { + "External id": 979594,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345937719706.003, "dur": 26.378, + "args": { + "External id": 979595,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345937719750.093, "dur": 20.626, + "args": { + "External id": 979596,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345937719791.296, "dur": 17.180, + "args": { + "External id": 979597,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345937719826.237, "dur": 18.310, + "args": { + "External id": 979598,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937719928.667, "dur": 17.017, + "args": { + "External id": 979599,"Record function id": 0, "Ev Idx": 2190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937719932.940, "dur": 11.608, + "args": { + "External id": 979600,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937719937.759, "dur": 5.785, + "args": { + "External id": 979601,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937719939.232, "dur": 4.173, + "args": { + "External id": 979602,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937719950.151, "dur": 5.026, + "args": { + "External id": 979603,"Record function id": 0, "Ev Idx": 2194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937719951.791, "dur": 2.770, + "args": { + "External id": 979604,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937719952.725, "dur": 1.332, + "args": { + "External id": 979605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937719953.184, "dur": 0.767, + "args": { + "External id": 979606,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937719959.024, "dur": 7.768, + "args": { + "External id": 979607,"Record function id": 0, "Ev Idx": 2198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937719960.384, "dur": 5.790, + "args": { + "External id": 979608,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937719961.230, "dur": 4.487, + "args": { + "External id": 979609,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937719962.206, "dur": 3.383, + "args": { + "External id": 979610,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937719970.571, "dur": 4.728, + "args": { + "External id": 979611,"Record function id": 0, "Ev Idx": 2202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937719972.189, "dur": 2.655, + "args": { + "External id": 979612,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937719972.820, "dur": 1.567, + "args": { + "External id": 979613,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937719973.257, "dur": 1.019, + "args": { + "External id": 979614,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937719978.927, "dur": 4.528, + "args": { + "External id": 979615,"Record function id": 0, "Ev Idx": 2206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937719980.477, "dur": 2.482, + "args": { + "External id": 979616,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937719981.148, "dur": 1.351, + "args": { + "External id": 979617,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937719981.644, "dur": 0.775, + "args": { + "External id": 979618,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937719987.104, "dur": 4.339, + "args": { + "External id": 979619,"Record function id": 0, "Ev Idx": 2210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937719988.623, "dur": 2.343, + "args": { + "External id": 979620,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937719989.215, "dur": 1.302, + "args": { + "External id": 979621,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937719989.696, "dur": 0.737, + "args": { + "External id": 979622,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937719995.189, "dur": 4.199, + "args": { + "External id": 979623,"Record function id": 0, "Ev Idx": 2214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937719996.630, "dur": 2.302, + "args": { + "External id": 979624,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937719997.331, "dur": 1.153, + "args": { + "External id": 979625,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937719997.631, "dur": 0.768, + "args": { + "External id": 979626,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937720003.010, "dur": 28.731, + "args": { + "External id": 979627,"Record function id": 0, "Ev Idx": 2218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937720004.671, "dur": 25.852, + "args": { + "External id": 979628,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937720005.461, "dur": 23.910, + "args": { + "External id": 979629,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937720027.298, "dur": 1.738, + "args": { + "External id": 979630,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937720039.495, "dur": 6.128, + "args": { + "External id": 979631,"Record function id": 0, "Ev Idx": 2222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937720041.799, "dur": 3.340, + "args": { + "External id": 979632,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937720043.040, "dur": 1.640, + "args": { + "External id": 979633,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937720043.619, "dur": 0.950, + "args": { + "External id": 979634,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937720050.471, "dur": 71054.094, + "args": { + "External id": 979635,"Record function id": 0, "Sequence number": 10552287, "Fwd thread id": 1, "Ev Idx": 2226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937720088.407, "dur": 71003.013, + "args": { + "External id": 979636,"Sequence number": 10552287, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2227 + } + }, + { + "ph": "f", "id": 179, "pid": 2338710, "tid": 2379450, "ts": 6345937720088.407, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.22)", "pid": 2338710, "tid": 2379450, + "ts": 6345937720126.866, "dur": 47.176, + "args": { + "External id": 979637,"Record function id": 0, "Ev Idx": 2228 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.22)", "pid": 2338710, "tid": 2379450, + "ts": 6345937720183.493, "dur": 70.899, + "args": { + "External id": 979638,"Record function id": 0, "Ev Idx": 2229 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.22)", "pid": 2338710, "tid": 2379450, + "ts": 6345937720260.890, "dur": 70784.766, + "args": { + "External id": 979639,"Record function id": 0, "Ev Idx": 2230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937720365.669, "dur": 9.028, + "args": { + "External id": 979640,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937720387.205, "dur": 8.086, + "args": { + "External id": 979641,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345937720416.749, "dur": 69539.120, + "args": { + "External id": 979642,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345937720434.583, "dur": 69504.574, + "args": { + "External id": 979643,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937720540.353, "dur": 21.667, + "args": { + "External id": 979644,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345937720585.940, "dur": 69294.612, + "args": { + "External id": 979645,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937720589.990, "dur": 69289.228, + "args": { + "External id": 979646,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937720595.479, "dur": 12.011, + "args": { + "External id": 979647,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937720610.155, "dur": 69262.627, + "args": { + "External id": 979648,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937790130.954, "dur": 17.919, + "args": { + "External id": 979649,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937790136.591, "dur": 11.520, + "args": { + "External id": 979650,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345937790184.241, "dur": 479.390, + "args": { + "External id": 979651,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937790223.550, "dur": 433.498, + "args": { + "External id": 979652,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2243, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345937790239.767, "dur": 410.658, + "args": { + "External id": 979653,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937790691.457, "dur": 2.579, + "args": { + "External id": 979654,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2245, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937790764.821, "dur": 8.121, + "args": { + "External id": 979655,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937790828.124, "dur": 1.736, + "args": { + "External id": 979656,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937790849.715, "dur": 4.519, + "args": { + "External id": 979657,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937790870.858, "dur": 1.011, + "args": { + "External id": 979658,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937790885.473, "dur": 1.093, + "args": { + "External id": 979659,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937790899.210, "dur": 1.242, + "args": { + "External id": 979660,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937790912.182, "dur": 3.682, + "args": { + "External id": 979661,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937790928.699, "dur": 3.157, + "args": { + "External id": 979662,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937790947.063, "dur": 0.719, + "args": { + "External id": 979663,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937791125.012, "dur": 3343.493, + "args": { + "External id": 979664,"Record function id": 0, "Ev Idx": 2255 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.21)", "pid": 2338710, "tid": 2379450, + "ts": 6345937791149.931, "dur": 1244.676, + "args": { + "External id": 979665,"Record function id": 0, "Ev Idx": 2256 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2338710, "tid": 2379450, + "ts": 6345937791166.537, "dur": 393.589, + "args": { + "External id": 979666,"Record function id": 0, "Ev Idx": 2257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937791260.713, "dur": 6.120, + "args": { + "External id": 979667,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937791270.814, "dur": 1.325, + "args": { + "External id": 979668,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937791274.390, "dur": 3.716, + "args": { + "External id": 979669,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937791280.232, "dur": 1.014, + "args": { + "External id": 979670,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937791283.453, "dur": 0.837, + "args": { + "External id": 979671,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937791286.024, "dur": 0.752, + "args": { + "External id": 979672,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937791288.435, "dur": 2.565, + "args": { + "External id": 979673,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937791294.878, "dur": 0.967, + "args": { + "External id": 979674,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937791297.663, "dur": 0.591, + "args": { + "External id": 979675,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937791299.868, "dur": 1.246, + "args": { + "External id": 979676,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937791321.063, "dur": 201.765, + "args": { + "External id": 979677,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937791341.619, "dur": 174.865, + "args": { + "External id": 979678,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937791370.916, "dur": 20.715, + "args": { + "External id": 979679,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345937791397.442, "dur": 85.320, + "args": { + "External id": 979680,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937791400.633, "dur": 81.730, + "args": { + "External id": 979681,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937791407.606, "dur": 7.407, + "args": { + "External id": 979682,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937791417.027, "dur": 64.775, + "args": { + "External id": 979683,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2274 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.20", "pid": 2338710, "tid": 2379450, + "ts": 6345937791652.551, "dur": 732.867, + "args": { + "External id": 979684,"Record function id": 0, "Ev Idx": 2275 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2338710, "tid": 2379450, + "ts": 6345937791670.919, "dur": 700.218, + "args": { + "External id": 979685,"Record function id": 0, "Ev Idx": 2276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937791733.832, "dur": 6.562, + "args": { + "External id": 979686,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345937791758.032, "dur": 39.763, + "args": { + "External id": 979687,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937791763.939, "dur": 1.876, + "args": { + "External id": 979688,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937791768.397, "dur": 2.283, + "args": { + "External id": 979689,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937791772.338, "dur": 0.721, + "args": { + "External id": 979690,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937791774.821, "dur": 0.896, + "args": { + "External id": 979691,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937791779.033, "dur": 0.634, + "args": { + "External id": 979692,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937791781.339, "dur": 2.920, + "args": { + "External id": 979693,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937791785.883, "dur": 0.814, + "args": { + "External id": 979694,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937791789.804, "dur": 0.390, + "args": { + "External id": 979695,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937791791.571, "dur": 0.322, + "args": { + "External id": 979696,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937791809.037, "dur": 46.900, + "args": { + "External id": 979697,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345937791892.713, "dur": 145.865, + "args": { + "External id": 979698,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937791903.866, "dur": 3.985, + "args": { + "External id": 979699,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345937791914.499, "dur": 11.526, + "args": { + "External id": 979700,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345937791919.300, "dur": 6.227, + "args": { + "External id": 979701,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937791923.450, "dur": 0.675, + "args": { + "External id": 979702,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345937791933.977, "dur": 30.421, + "args": { + "External id": 979703,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937791936.313, "dur": 0.663, + "args": { + "External id": 979704,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937791939.872, "dur": 0.465, + "args": { + "External id": 979705,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937791941.798, "dur": 2.572, + "args": { + "External id": 979706,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937791946.063, "dur": 2.031, + "args": { + "External id": 979707,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937791949.865, "dur": 0.470, + "args": { + "External id": 979708,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937791952.284, "dur": 0.274, + "args": { + "External id": 979709,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937791956.290, "dur": 0.459, + "args": { + "External id": 979710,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937791958.192, "dur": 0.280, + "args": { + "External id": 979711,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937791959.889, "dur": 0.390, + "args": { + "External id": 979712,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937791975.405, "dur": 50.432, + "args": { + "External id": 979713,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345937792134.844, "dur": 150.878, + "args": { + "External id": 979714,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937792170.859, "dur": 110.465, + "args": { + "External id": 979715,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2306, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345937792183.769, "dur": 92.204, + "args": { + "External id": 979716,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937792305.899, "dur": 2.261, + "args": { + "External id": 979717,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2308, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937792403.003, "dur": 2040.314, + "args": { + "External id": 979718,"Sequence number": 10552286, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2309 + } + }, + { + "ph": "f", "id": 180, "pid": 2338710, "tid": 2379450, "ts": 6345937792403.003, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937792528.553, "dur": 127.240, + "args": { + "External id": 979719,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345937792702.150, "dur": 45.426, + "args": { + "External id": 979720,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345937792766.704, "dur": 57.403, + "args": { + "External id": 979721,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937792837.342, "dur": 34.858, + "args": { + "External id": 979722,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937792879.072, "dur": 37.081, + "args": { + "External id": 979723,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937792923.075, "dur": 31.504, + "args": { + "External id": 979724,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937792962.173, "dur": 33.157, + "args": { + "External id": 979725,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345937793049.974, "dur": 72.233, + "args": { + "External id": 979726,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345937793145.367, "dur": 32.575, + "args": { + "External id": 979727,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345937793207.702, "dur": 23.164, + "args": { + "External id": 979728,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345937793245.948, "dur": 19.562, + "args": { + "External id": 979729,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937793273.799, "dur": 47.825, + "args": { + "External id": 979730,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937793325.527, "dur": 36.099, + "args": { + "External id": 979731,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937793397.852, "dur": 321.076, + "args": { + "External id": 979732,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937793488.103, "dur": 7.109, + "args": { + "External id": 979733,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937793497.469, "dur": 2.451, + "args": { + "External id": 979734,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937793501.583, "dur": 3.038, + "args": { + "External id": 979735,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937793505.985, "dur": 2.243, + "args": { + "External id": 979736,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937793556.678, "dur": 8.336, + "args": { + "External id": 979737,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937793561.671, "dur": 3.124, + "args": { + "External id": 979738,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937793578.154, "dur": 52.382, + "args": { + "External id": 979739,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937793587.032, "dur": 6.293, + "args": { + "External id": 979740,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937793633.692, "dur": 2.256, + "args": { + "External id": 979741,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937793634.901, "dur": 0.955, + "args": { + "External id": 979742,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937793637.492, "dur": 17.572, + "args": { + "External id": 979743,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937793639.816, "dur": 0.730, + "args": { + "External id": 979744,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345937793763.828, "dur": 34.055, + "args": { + "External id": 979745,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345937793819.158, "dur": 18.288, + "args": { + "External id": 979746,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937793845.616, "dur": 47.303, + "args": { + "External id": 979747,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937793899.646, "dur": 42.522, + "args": { + "External id": 979748,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937793952.234, "dur": 24.813, + "args": { + "External id": 979749,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937793983.344, "dur": 55.130, + "args": { + "External id": 979750,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937794051.662, "dur": 90.746, + "args": { + "External id": 979751,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937794153.682, "dur": 33.773, + "args": { + "External id": 979752,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345937794213.281, "dur": 29.959, + "args": { + "External id": 979753,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345937794272.929, "dur": 28.096, + "args": { + "External id": 979754,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345937794320.748, "dur": 19.451, + "args": { + "External id": 979755,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345937794358.380, "dur": 15.935, + "args": { + "External id": 979756,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345937794392.763, "dur": 17.139, + "args": { + "External id": 979757,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937794495.909, "dur": 19.667, + "args": { + "External id": 979758,"Record function id": 0, "Ev Idx": 2349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937794499.956, "dur": 14.595, + "args": { + "External id": 979759,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937794504.822, "dur": 8.809, + "args": { + "External id": 979760,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937794506.745, "dur": 6.765, + "args": { + "External id": 979761,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937794520.198, "dur": 5.679, + "args": { + "External id": 979762,"Record function id": 0, "Ev Idx": 2353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937794521.929, "dur": 3.323, + "args": { + "External id": 979763,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937794522.878, "dur": 1.860, + "args": { + "External id": 979764,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937794523.464, "dur": 1.180, + "args": { + "External id": 979765,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937794529.732, "dur": 7.786, + "args": { + "External id": 979766,"Record function id": 0, "Ev Idx": 2357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937794531.360, "dur": 5.647, + "args": { + "External id": 979767,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937794532.285, "dur": 4.239, + "args": { + "External id": 979768,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937794533.330, "dur": 3.080, + "args": { + "External id": 979769,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937794541.323, "dur": 4.782, + "args": { + "External id": 979770,"Record function id": 0, "Ev Idx": 2361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937794542.849, "dur": 2.786, + "args": { + "External id": 979771,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937794543.705, "dur": 1.453, + "args": { + "External id": 979772,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937794544.170, "dur": 0.883, + "args": { + "External id": 979773,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937794549.879, "dur": 4.738, + "args": { + "External id": 979774,"Record function id": 0, "Ev Idx": 2365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937794551.453, "dur": 2.641, + "args": { + "External id": 979775,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937794552.342, "dur": 1.267, + "args": { + "External id": 979776,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937794552.697, "dur": 0.820, + "args": { + "External id": 979777,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937794558.298, "dur": 5.347, + "args": { + "External id": 979778,"Record function id": 0, "Ev Idx": 2369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937794559.670, "dur": 3.508, + "args": { + "External id": 979779,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937794560.571, "dur": 2.104, + "args": { + "External id": 979780,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937794561.423, "dur": 1.169, + "args": { + "External id": 979781,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937794567.465, "dur": 4.716, + "args": { + "External id": 979782,"Record function id": 0, "Ev Idx": 2373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937794569.021, "dur": 2.691, + "args": { + "External id": 979783,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937794569.784, "dur": 1.410, + "args": { + "External id": 979784,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937794570.312, "dur": 0.798, + "args": { + "External id": 979785,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937794576.089, "dur": 6.555, + "args": { + "External id": 979786,"Record function id": 0, "Ev Idx": 2377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937794577.634, "dur": 4.507, + "args": { + "External id": 979787,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937794578.203, "dur": 3.455, + "args": { + "External id": 979788,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937794580.749, "dur": 0.786, + "args": { + "External id": 979789,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937794587.816, "dur": 4.429, + "args": { + "External id": 979790,"Record function id": 0, "Ev Idx": 2381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937794588.915, "dur": 2.853, + "args": { + "External id": 979791,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937794589.701, "dur": 1.587, + "args": { + "External id": 979792,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937794590.493, "dur": 0.686, + "args": { + "External id": 979793,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937794596.400, "dur": 64946.742, + "args": { + "External id": 979794,"Record function id": 0, "Sequence number": 10552285, "Fwd thread id": 1, "Ev Idx": 2385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937794598.037, "dur": 64934.690, + "args": { + "External id": 979795,"Sequence number": 10552285, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2386 + } + }, + { + "ph": "f", "id": 181, "pid": 2338710, "tid": 2379450, "ts": 6345937794598.037, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.21)", "pid": 2338710, "tid": 2379450, + "ts": 6345937794635.479, "dur": 45.165, + "args": { + "External id": 979796,"Record function id": 0, "Ev Idx": 2387 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.21)", "pid": 2338710, "tid": 2379450, + "ts": 6345937794690.844, "dur": 71.405, + "args": { + "External id": 979797,"Record function id": 0, "Ev Idx": 2388 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.21)", "pid": 2338710, "tid": 2379450, + "ts": 6345937794769.140, "dur": 64753.222, + "args": { + "External id": 979798,"Record function id": 0, "Ev Idx": 2389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937794873.303, "dur": 7.823, + "args": { + "External id": 979799,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937794893.073, "dur": 7.994, + "args": { + "External id": 979800,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345937794917.937, "dur": 63518.950, + "args": { + "External id": 979801,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345937794937.486, "dur": 63481.862, + "args": { + "External id": 979802,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937795104.017, "dur": 25.964, + "args": { + "External id": 979803,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345937795156.026, "dur": 63209.341, + "args": { + "External id": 979804,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937795163.338, "dur": 63200.548, + "args": { + "External id": 979805,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937795169.032, "dur": 15.272, + "args": { + "External id": 979806,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937795188.089, "dur": 63169.541, + "args": { + "External id": 979807,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937858577.123, "dur": 16.937, + "args": { + "External id": 979808,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937858582.571, "dur": 11.003, + "args": { + "External id": 979809,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345937858632.282, "dur": 495.252, + "args": { + "External id": 979810,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937858676.674, "dur": 443.299, + "args": { + "External id": 979811,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2402, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345937858694.931, "dur": 416.544, + "args": { + "External id": 979812,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937859170.249, "dur": 2.833, + "args": { + "External id": 979813,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2404, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937859250.020, "dur": 9.363, + "args": { + "External id": 979814,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937859317.962, "dur": 1.964, + "args": { + "External id": 979815,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937859341.084, "dur": 4.916, + "args": { + "External id": 979816,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937859360.686, "dur": 1.204, + "args": { + "External id": 979817,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937859378.403, "dur": 0.872, + "args": { + "External id": 979818,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937859393.717, "dur": 1.170, + "args": { + "External id": 979819,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937859409.646, "dur": 3.231, + "args": { + "External id": 979820,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937859427.613, "dur": 3.049, + "args": { + "External id": 979821,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937859445.734, "dur": 1.296, + "args": { + "External id": 979822,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937859561.741, "dur": 3421.161, + "args": { + "External id": 979823,"Record function id": 0, "Ev Idx": 2414 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.20)", "pid": 2338710, "tid": 2379450, + "ts": 6345937859585.006, "dur": 1292.813, + "args": { + "External id": 979824,"Record function id": 0, "Ev Idx": 2415 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2338710, "tid": 2379450, + "ts": 6345937859606.966, "dur": 395.960, + "args": { + "External id": 979825,"Record function id": 0, "Ev Idx": 2416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937859707.024, "dur": 6.167, + "args": { + "External id": 979826,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937859716.824, "dur": 1.202, + "args": { + "External id": 979827,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937859720.122, "dur": 3.204, + "args": { + "External id": 979828,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937859725.359, "dur": 0.861, + "args": { + "External id": 979829,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937859727.966, "dur": 0.987, + "args": { + "External id": 979830,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937859730.715, "dur": 0.931, + "args": { + "External id": 979831,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937859733.915, "dur": 2.852, + "args": { + "External id": 979832,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937859740.304, "dur": 0.917, + "args": { + "External id": 979833,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937859742.834, "dur": 1.065, + "args": { + "External id": 979834,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937859745.719, "dur": 1.035, + "args": { + "External id": 979835,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937859767.153, "dur": 197.373, + "args": { + "External id": 979836,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937859787.609, "dur": 170.496, + "args": { + "External id": 979837,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937859813.206, "dur": 20.657, + "args": { + "External id": 979838,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345937859839.406, "dur": 83.323, + "args": { + "External id": 979839,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937859842.741, "dur": 79.582, + "args": { + "External id": 979840,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937859848.295, "dur": 7.631, + "args": { + "External id": 979841,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937859858.214, "dur": 63.313, + "args": { + "External id": 979842,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2433 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.19", "pid": 2338710, "tid": 2379450, + "ts": 6345937860166.419, "dur": 701.703, + "args": { + "External id": 979843,"Record function id": 0, "Ev Idx": 2434 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2338710, "tid": 2379450, + "ts": 6345937860187.220, "dur": 666.929, + "args": { + "External id": 979844,"Record function id": 0, "Ev Idx": 2435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937860254.594, "dur": 8.299, + "args": { + "External id": 979845,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345937860280.692, "dur": 41.028, + "args": { + "External id": 979846,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937860287.045, "dur": 2.998, + "args": { + "External id": 979847,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937860292.378, "dur": 0.627, + "args": { + "External id": 979848,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937860294.571, "dur": 0.468, + "args": { + "External id": 979849,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937860298.651, "dur": 0.390, + "args": { + "External id": 979850,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937860300.573, "dur": 0.587, + "args": { + "External id": 979851,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937860302.878, "dur": 2.773, + "args": { + "External id": 979852,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937860308.681, "dur": 0.547, + "args": { + "External id": 979853,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937860310.656, "dur": 0.543, + "args": { + "External id": 979854,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937860312.949, "dur": 1.802, + "args": { + "External id": 979855,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937860333.851, "dur": 58.198, + "args": { + "External id": 979856,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345937860432.015, "dur": 141.005, + "args": { + "External id": 979857,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937860449.211, "dur": 3.849, + "args": { + "External id": 979858,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345937860459.482, "dur": 12.193, + "args": { + "External id": 979859,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345937860464.513, "dur": 6.677, + "args": { + "External id": 979860,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937860468.943, "dur": 0.657, + "args": { + "External id": 979861,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345937860479.324, "dur": 32.889, + "args": { + "External id": 979862,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937860482.080, "dur": 0.651, + "args": { + "External id": 979863,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937860484.632, "dur": 0.654, + "args": { + "External id": 979864,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937860487.238, "dur": 3.270, + "args": { + "External id": 979865,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937860492.188, "dur": 0.365, + "args": { + "External id": 979866,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937860494.126, "dur": 0.474, + "args": { + "External id": 979867,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937860497.896, "dur": 0.435, + "args": { + "External id": 979868,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937860500.259, "dur": 0.443, + "args": { + "External id": 979869,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937860502.308, "dur": 0.394, + "args": { + "External id": 979870,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937860506.126, "dur": 0.309, + "args": { + "External id": 979871,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937860526.651, "dur": 37.314, + "args": { + "External id": 979872,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345937860623.755, "dur": 148.706, + "args": { + "External id": 979873,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937860658.359, "dur": 109.788, + "args": { + "External id": 979874,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2465, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345937860671.781, "dur": 90.257, + "args": { + "External id": 979875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937860793.114, "dur": 2.182, + "args": { + "External id": 979876,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2467, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937860886.192, "dur": 2071.307, + "args": { + "External id": 979877,"Sequence number": 10552284, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2468 + } + }, + { + "ph": "f", "id": 182, "pid": 2338710, "tid": 2379450, "ts": 6345937860886.192, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937861033.928, "dur": 170.374, + "args": { + "External id": 979878,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345937861261.075, "dur": 45.496, + "args": { + "External id": 979879,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345937861326.982, "dur": 58.423, + "args": { + "External id": 979880,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937861399.693, "dur": 35.194, + "args": { + "External id": 979881,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937861442.411, "dur": 35.972, + "args": { + "External id": 979882,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937861485.567, "dur": 29.525, + "args": { + "External id": 979883,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937861523.390, "dur": 31.526, + "args": { + "External id": 979884,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345937861586.434, "dur": 26.269, + "args": { + "External id": 979885,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345937861634.656, "dur": 32.820, + "args": { + "External id": 979886,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345937861694.261, "dur": 22.621, + "args": { + "External id": 979887,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345937861734.241, "dur": 16.834, + "args": { + "External id": 979888,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937861760.220, "dur": 41.564, + "args": { + "External id": 979889,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937861806.424, "dur": 34.923, + "args": { + "External id": 979890,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937861877.647, "dur": 396.234, + "args": { + "External id": 979891,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937861968.198, "dur": 6.729, + "args": { + "External id": 979892,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937861977.134, "dur": 3.464, + "args": { + "External id": 979893,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937861982.244, "dur": 3.247, + "args": { + "External id": 979894,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937861986.778, "dur": 2.498, + "args": { + "External id": 979895,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937862125.412, "dur": 11.992, + "args": { + "External id": 979896,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937862131.916, "dur": 4.290, + "args": { + "External id": 979897,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937862139.832, "dur": 40.249, + "args": { + "External id": 979898,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937862147.435, "dur": 4.409, + "args": { + "External id": 979899,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937862182.516, "dur": 2.224, + "args": { + "External id": 979900,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937862183.932, "dur": 0.748, + "args": { + "External id": 979901,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937862186.396, "dur": 19.792, + "args": { + "External id": 979902,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937862189.048, "dur": 0.592, + "args": { + "External id": 979903,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345937862319.982, "dur": 36.578, + "args": { + "External id": 979904,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345937862379.952, "dur": 18.209, + "args": { + "External id": 979905,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937862407.670, "dur": 59.633, + "args": { + "External id": 979906,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937862475.055, "dur": 46.903, + "args": { + "External id": 979907,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937862533.648, "dur": 24.794, + "args": { + "External id": 979908,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937862565.751, "dur": 34.478, + "args": { + "External id": 979909,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937862609.146, "dur": 31.506, + "args": { + "External id": 979910,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937862648.093, "dur": 34.250, + "args": { + "External id": 979911,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345937862708.684, "dur": 29.944, + "args": { + "External id": 979912,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345937862779.495, "dur": 29.137, + "args": { + "External id": 979913,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345937862828.301, "dur": 20.777, + "args": { + "External id": 979914,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345937862866.711, "dur": 19.529, + "args": { + "External id": 979915,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345937862903.571, "dur": 19.117, + "args": { + "External id": 979916,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937863028.369, "dur": 20.660, + "args": { + "External id": 979917,"Record function id": 0, "Ev Idx": 2508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937863033.952, "dur": 13.635, + "args": { + "External id": 979918,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937863039.442, "dur": 6.482, + "args": { + "External id": 979919,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937863040.904, "dur": 4.757, + "args": { + "External id": 979920,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937863093.398, "dur": 9.309, + "args": { + "External id": 979921,"Record function id": 0, "Ev Idx": 2512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937863096.334, "dur": 5.541, + "args": { + "External id": 979922,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937863098.168, "dur": 2.644, + "args": { + "External id": 979923,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937863098.961, "dur": 1.638, + "args": { + "External id": 979924,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937863106.915, "dur": 8.017, + "args": { + "External id": 979925,"Record function id": 0, "Ev Idx": 2516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937863108.630, "dur": 5.759, + "args": { + "External id": 979926,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937863109.289, "dur": 4.463, + "args": { + "External id": 979927,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937863110.181, "dur": 3.465, + "args": { + "External id": 979928,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937863118.927, "dur": 5.039, + "args": { + "External id": 979929,"Record function id": 0, "Ev Idx": 2520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937863120.336, "dur": 3.121, + "args": { + "External id": 979930,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937863121.088, "dur": 1.880, + "args": { + "External id": 979931,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937863121.914, "dur": 0.947, + "args": { + "External id": 979932,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937863127.829, "dur": 4.894, + "args": { + "External id": 979933,"Record function id": 0, "Ev Idx": 2524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937863129.630, "dur": 2.631, + "args": { + "External id": 979934,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937863130.458, "dur": 1.285, + "args": { + "External id": 979935,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937863130.907, "dur": 0.715, + "args": { + "External id": 979936,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937863136.587, "dur": 4.924, + "args": { + "External id": 979937,"Record function id": 0, "Ev Idx": 2528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937863138.172, "dur": 2.816, + "args": { + "External id": 979938,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937863138.949, "dur": 1.519, + "args": { + "External id": 979939,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937863139.661, "dur": 0.687, + "args": { + "External id": 979940,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937863145.745, "dur": 5.270, + "args": { + "External id": 979941,"Record function id": 0, "Ev Idx": 2532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937863147.704, "dur": 2.837, + "args": { + "External id": 979942,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937863148.623, "dur": 1.425, + "args": { + "External id": 979943,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937863148.981, "dur": 0.924, + "args": { + "External id": 979944,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937863154.901, "dur": 7.664, + "args": { + "External id": 979945,"Record function id": 0, "Ev Idx": 2536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937863156.555, "dur": 5.528, + "args": { + "External id": 979946,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937863157.364, "dur": 4.224, + "args": { + "External id": 979947,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937863160.536, "dur": 0.962, + "args": { + "External id": 979948,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937863166.388, "dur": 4.588, + "args": { + "External id": 979949,"Record function id": 0, "Ev Idx": 2540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937863168.009, "dur": 2.485, + "args": { + "External id": 979950,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937863168.600, "dur": 1.402, + "args": { + "External id": 979951,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937863168.966, "dur": 0.921, + "args": { + "External id": 979952,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937863175.725, "dur": 64322.926, + "args": { + "External id": 979953,"Record function id": 0, "Sequence number": 10552283, "Fwd thread id": 1, "Ev Idx": 2544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937863177.101, "dur": 64309.406, + "args": { + "External id": 979954,"Sequence number": 10552283, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2545 + } + }, + { + "ph": "f", "id": 183, "pid": 2338710, "tid": 2379450, "ts": 6345937863177.101, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.20)", "pid": 2338710, "tid": 2379450, + "ts": 6345937863216.074, "dur": 48.154, + "args": { + "External id": 979955,"Record function id": 0, "Ev Idx": 2546 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.20)", "pid": 2338710, "tid": 2379450, + "ts": 6345937863273.768, "dur": 71.904, + "args": { + "External id": 979956,"Record function id": 0, "Ev Idx": 2547 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.20)", "pid": 2338710, "tid": 2379450, + "ts": 6345937863352.422, "dur": 64123.348, + "args": { + "External id": 979957,"Record function id": 0, "Ev Idx": 2548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937863459.167, "dur": 9.842, + "args": { + "External id": 979958,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937863480.087, "dur": 7.262, + "args": { + "External id": 979959,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345937863504.186, "dur": 62781.298, + "args": { + "External id": 979960,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345937863522.849, "dur": 62745.174, + "args": { + "External id": 979961,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937863620.475, "dur": 22.414, + "args": { + "External id": 979962,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345937863670.000, "dur": 62537.536, + "args": { + "External id": 979963,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937863674.323, "dur": 62531.996, + "args": { + "External id": 979964,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937863681.380, "dur": 10.949, + "args": { + "External id": 979965,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937863697.683, "dur": 62501.684, + "args": { + "External id": 979966,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937926423.573, "dur": 17.061, + "args": { + "External id": 979967,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937926429.029, "dur": 11.039, + "args": { + "External id": 979968,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345937926511.972, "dur": 490.021, + "args": { + "External id": 979969,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937926558.294, "dur": 436.775, + "args": { + "External id": 979970,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2561, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345937926575.428, "dur": 411.881, + "args": { + "External id": 979971,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937927092.670, "dur": 5.365, + "args": { + "External id": 979972,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2563, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937927187.925, "dur": 9.566, + "args": { + "External id": 979973,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937927259.573, "dur": 2.265, + "args": { + "External id": 979974,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937927282.105, "dur": 4.565, + "args": { + "External id": 979975,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937927302.102, "dur": 1.057, + "args": { + "External id": 979976,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937927324.368, "dur": 1.291, + "args": { + "External id": 979977,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937927339.713, "dur": 1.012, + "args": { + "External id": 979978,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937927355.313, "dur": 3.425, + "args": { + "External id": 979979,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937927373.553, "dur": 2.541, + "args": { + "External id": 979980,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937927394.000, "dur": 2.356, + "args": { + "External id": 979981,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937927523.425, "dur": 3427.237, + "args": { + "External id": 979982,"Record function id": 0, "Ev Idx": 2573 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.19)", "pid": 2338710, "tid": 2379450, + "ts": 6345937927548.841, "dur": 1278.792, + "args": { + "External id": 979983,"Record function id": 0, "Ev Idx": 2574 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2338710, "tid": 2379450, + "ts": 6345937927567.015, "dur": 401.367, + "args": { + "External id": 979984,"Record function id": 0, "Ev Idx": 2575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937927670.292, "dur": 6.260, + "args": { + "External id": 979985,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937927680.266, "dur": 1.075, + "args": { + "External id": 979986,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937927683.682, "dur": 3.083, + "args": { + "External id": 979987,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937927688.996, "dur": 1.291, + "args": { + "External id": 979988,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937927692.116, "dur": 1.156, + "args": { + "External id": 979989,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937927695.017, "dur": 0.938, + "args": { + "External id": 979990,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937927697.867, "dur": 2.305, + "args": { + "External id": 979991,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937927704.352, "dur": 0.959, + "args": { + "External id": 979992,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937927706.931, "dur": 0.741, + "args": { + "External id": 979993,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937927709.013, "dur": 0.965, + "args": { + "External id": 979994,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937927730.712, "dur": 200.210, + "args": { + "External id": 979995,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937927751.073, "dur": 173.129, + "args": { + "External id": 979996,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937927772.149, "dur": 19.004, + "args": { + "External id": 979997,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345937927798.990, "dur": 87.951, + "args": { + "External id": 979998,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937927803.450, "dur": 82.952, + "args": { + "External id": 979999,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937927809.113, "dur": 7.526, + "args": { + "External id": 980000,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937927818.840, "dur": 66.223, + "args": { + "External id": 980001,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2592 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.18", "pid": 2338710, "tid": 2379450, + "ts": 6345937928138.620, "dur": 678.539, + "args": { + "External id": 980002,"Record function id": 0, "Ev Idx": 2593 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2338710, "tid": 2379450, + "ts": 6345937928160.698, "dur": 643.045, + "args": { + "External id": 980003,"Record function id": 0, "Ev Idx": 2594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937928228.844, "dur": 7.741, + "args": { + "External id": 980004,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345937928254.443, "dur": 40.751, + "args": { + "External id": 980005,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937928260.848, "dur": 2.010, + "args": { + "External id": 980006,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937928265.155, "dur": 1.072, + "args": { + "External id": 980007,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937928268.287, "dur": 0.760, + "args": { + "External id": 980008,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937928270.991, "dur": 0.685, + "args": { + "External id": 980009,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937928274.189, "dur": 0.690, + "args": { + "External id": 980010,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937928277.390, "dur": 3.283, + "args": { + "External id": 980011,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937928282.633, "dur": 0.479, + "args": { + "External id": 980012,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937928285.151, "dur": 0.473, + "args": { + "External id": 980013,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937928287.829, "dur": 0.513, + "args": { + "External id": 980014,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937928306.605, "dur": 54.493, + "args": { + "External id": 980015,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345937928398.525, "dur": 135.920, + "args": { + "External id": 980016,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937928412.695, "dur": 4.344, + "args": { + "External id": 980017,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345937928423.593, "dur": 12.988, + "args": { + "External id": 980018,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345937928428.965, "dur": 7.112, + "args": { + "External id": 980019,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937928433.682, "dur": 0.704, + "args": { + "External id": 980020,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345937928444.552, "dur": 32.545, + "args": { + "External id": 980021,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937928447.283, "dur": 0.724, + "args": { + "External id": 980022,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937928450.373, "dur": 0.310, + "args": { + "External id": 980023,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937928452.604, "dur": 3.334, + "args": { + "External id": 980024,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937928457.765, "dur": 0.497, + "args": { + "External id": 980025,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937928460.358, "dur": 0.444, + "args": { + "External id": 980026,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937928463.122, "dur": 0.434, + "args": { + "External id": 980027,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937928465.947, "dur": 0.546, + "args": { + "External id": 980028,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937928468.735, "dur": 0.409, + "args": { + "External id": 980029,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937928471.574, "dur": 0.541, + "args": { + "External id": 980030,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937928489.239, "dur": 35.987, + "args": { + "External id": 980031,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345937928587.048, "dur": 137.652, + "args": { + "External id": 980032,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937928616.550, "dur": 103.910, + "args": { + "External id": 980033,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2624, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345937928628.699, "dur": 86.389, + "args": { + "External id": 980034,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937928743.871, "dur": 2.196, + "args": { + "External id": 980035,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2626, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937928835.868, "dur": 2088.575, + "args": { + "External id": 980036,"Sequence number": 10552282, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2627 + } + }, + { + "ph": "f", "id": 184, "pid": 2338710, "tid": 2379450, "ts": 6345937928835.868, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937928965.416, "dur": 201.117, + "args": { + "External id": 980037,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345937929224.193, "dur": 49.598, + "args": { + "External id": 980038,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345937929294.662, "dur": 64.761, + "args": { + "External id": 980039,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937929373.454, "dur": 37.593, + "args": { + "External id": 980040,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937929418.845, "dur": 36.137, + "args": { + "External id": 980041,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937929463.215, "dur": 31.781, + "args": { + "External id": 980042,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937929503.585, "dur": 33.839, + "args": { + "External id": 980043,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345937929567.074, "dur": 25.209, + "args": { + "External id": 980044,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345937929618.223, "dur": 33.025, + "args": { + "External id": 980045,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345937929676.548, "dur": 22.469, + "args": { + "External id": 980046,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345937929715.824, "dur": 19.674, + "args": { + "External id": 980047,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937929746.690, "dur": 44.438, + "args": { + "External id": 980048,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937929795.669, "dur": 37.676, + "args": { + "External id": 980049,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937929868.767, "dur": 393.780, + "args": { + "External id": 980050,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937929977.539, "dur": 10.509, + "args": { + "External id": 980051,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937929990.880, "dur": 3.200, + "args": { + "External id": 980052,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937929995.665, "dur": 2.514, + "args": { + "External id": 980053,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937930000.003, "dur": 2.999, + "args": { + "External id": 980054,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937930122.430, "dur": 7.295, + "args": { + "External id": 980055,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937930125.574, "dur": 3.749, + "args": { + "External id": 980056,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937930132.532, "dur": 45.606, + "args": { + "External id": 980057,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937930139.643, "dur": 4.561, + "args": { + "External id": 980058,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937930180.653, "dur": 2.701, + "args": { + "External id": 980059,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937930182.503, "dur": 0.750, + "args": { + "External id": 980060,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937930185.587, "dur": 16.732, + "args": { + "External id": 980061,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937930188.481, "dur": 0.541, + "args": { + "External id": 980062,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345937930307.762, "dur": 34.568, + "args": { + "External id": 980063,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345937930362.954, "dur": 18.974, + "args": { + "External id": 980064,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937930392.915, "dur": 59.473, + "args": { + "External id": 980065,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937930460.836, "dur": 49.723, + "args": { + "External id": 980066,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937930522.601, "dur": 26.961, + "args": { + "External id": 980067,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937930557.644, "dur": 36.470, + "args": { + "External id": 980068,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937930603.048, "dur": 31.449, + "args": { + "External id": 980069,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937930643.146, "dur": 36.142, + "args": { + "External id": 980070,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345937930698.916, "dur": 28.287, + "args": { + "External id": 980071,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345937930749.183, "dur": 28.398, + "args": { + "External id": 980072,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345937930796.997, "dur": 21.611, + "args": { + "External id": 980073,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345937930837.317, "dur": 19.643, + "args": { + "External id": 980074,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345937930872.386, "dur": 18.769, + "args": { + "External id": 980075,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937930976.643, "dur": 17.771, + "args": { + "External id": 980076,"Record function id": 0, "Ev Idx": 2667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937930981.298, "dur": 12.076, + "args": { + "External id": 980077,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937930986.063, "dur": 6.279, + "args": { + "External id": 980078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937930988.028, "dur": 4.173, + "args": { + "External id": 980079,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937930999.268, "dur": 6.898, + "args": { + "External id": 980080,"Record function id": 0, "Ev Idx": 2671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937931001.188, "dur": 4.460, + "args": { + "External id": 980081,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937931002.443, "dur": 2.538, + "args": { + "External id": 980082,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937931003.339, "dur": 1.523, + "args": { + "External id": 980083,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937931032.083, "dur": 11.372, + "args": { + "External id": 980084,"Record function id": 0, "Ev Idx": 2675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937931034.242, "dur": 8.467, + "args": { + "External id": 980085,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937931036.227, "dur": 5.321, + "args": { + "External id": 980086,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937931037.062, "dur": 4.267, + "args": { + "External id": 980087,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937931047.683, "dur": 5.931, + "args": { + "External id": 980088,"Record function id": 0, "Ev Idx": 2679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937931049.508, "dur": 3.599, + "args": { + "External id": 980089,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937931050.640, "dur": 1.893, + "args": { + "External id": 980090,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937931051.334, "dur": 1.120, + "args": { + "External id": 980091,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937931095.820, "dur": 9.072, + "args": { + "External id": 980092,"Record function id": 0, "Ev Idx": 2683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937931098.730, "dur": 5.426, + "args": { + "External id": 980093,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937931100.362, "dur": 2.790, + "args": { + "External id": 980094,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937931101.483, "dur": 1.474, + "args": { + "External id": 980095,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937931109.375, "dur": 6.070, + "args": { + "External id": 980096,"Record function id": 0, "Ev Idx": 2687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937931110.999, "dur": 3.906, + "args": { + "External id": 980097,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937931112.014, "dur": 2.261, + "args": { + "External id": 980098,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937931113.101, "dur": 1.048, + "args": { + "External id": 980099,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937931119.446, "dur": 5.304, + "args": { + "External id": 980100,"Record function id": 0, "Ev Idx": 2691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937931121.446, "dur": 2.803, + "args": { + "External id": 980101,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937931122.274, "dur": 1.471, + "args": { + "External id": 980102,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937931122.803, "dur": 0.865, + "args": { + "External id": 980103,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937931128.595, "dur": 5.232, + "args": { + "External id": 980104,"Record function id": 0, "Ev Idx": 2695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937931130.174, "dur": 3.115, + "args": { + "External id": 980105,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937931131.307, "dur": 1.458, + "args": { + "External id": 980106,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937931131.844, "dur": 0.793, + "args": { + "External id": 980107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937931137.575, "dur": 4.947, + "args": { + "External id": 980108,"Record function id": 0, "Ev Idx": 2699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937931139.072, "dur": 2.946, + "args": { + "External id": 980109,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937931139.680, "dur": 1.835, + "args": { + "External id": 980110,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937931140.581, "dur": 0.801, + "args": { + "External id": 980111,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937931147.402, "dur": 62383.303, + "args": { + "External id": 980112,"Record function id": 0, "Sequence number": 10552281, "Fwd thread id": 1, "Ev Idx": 2703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937931149.071, "dur": 62369.995, + "args": { + "External id": 980113,"Sequence number": 10552281, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2704 + } + }, + { + "ph": "f", "id": 185, "pid": 2338710, "tid": 2379450, "ts": 6345937931149.071, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.19)", "pid": 2338710, "tid": 2379450, + "ts": 6345937931187.213, "dur": 47.346, + "args": { + "External id": 980114,"Record function id": 0, "Ev Idx": 2705 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.19)", "pid": 2338710, "tid": 2379450, + "ts": 6345937931243.649, "dur": 75.158, + "args": { + "External id": 980115,"Record function id": 0, "Ev Idx": 2706 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.19)", "pid": 2338710, "tid": 2379450, + "ts": 6345937931325.583, "dur": 62183.751, + "args": { + "External id": 980116,"Record function id": 0, "Ev Idx": 2707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937931436.494, "dur": 9.063, + "args": { + "External id": 980117,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937931457.730, "dur": 8.024, + "args": { + "External id": 980118,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345937931484.083, "dur": 60918.993, + "args": { + "External id": 980119,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345937931501.822, "dur": 60883.608, + "args": { + "External id": 980120,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937931632.455, "dur": 22.799, + "args": { + "External id": 980121,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345937931680.491, "dur": 60649.242, + "args": { + "External id": 980122,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937931685.217, "dur": 60641.962, + "args": { + "External id": 980123,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937931691.125, "dur": 22.779, + "args": { + "External id": 980124,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937931723.474, "dur": 60600.888, + "args": { + "External id": 980125,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937992541.670, "dur": 17.941, + "args": { + "External id": 980126,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937992547.511, "dur": 11.614, + "args": { + "External id": 980127,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345937992603.009, "dur": 502.010, + "args": { + "External id": 980128,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937992640.297, "dur": 456.278, + "args": { + "External id": 980129,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2720, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345937992657.055, "dur": 428.808, + "args": { + "External id": 980130,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937993136.899, "dur": 4.246, + "args": { + "External id": 980131,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2722, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937993228.038, "dur": 8.707, + "args": { + "External id": 980132,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937993294.513, "dur": 2.853, + "args": { + "External id": 980133,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937993318.040, "dur": 4.710, + "args": { + "External id": 980134,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937993337.570, "dur": 1.148, + "args": { + "External id": 980135,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937993353.399, "dur": 1.246, + "args": { + "External id": 980136,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937993369.259, "dur": 1.197, + "args": { + "External id": 980137,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937993384.717, "dur": 4.662, + "args": { + "External id": 980138,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937993403.579, "dur": 2.874, + "args": { + "External id": 980139,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937993429.914, "dur": 1.276, + "args": { + "External id": 980140,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937993547.979, "dur": 3444.627, + "args": { + "External id": 980141,"Record function id": 0, "Ev Idx": 2732 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.18)", "pid": 2338710, "tid": 2379450, + "ts": 6345937993571.065, "dur": 1304.923, + "args": { + "External id": 980142,"Record function id": 0, "Ev Idx": 2733 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2338710, "tid": 2379450, + "ts": 6345937993595.690, "dur": 395.595, + "args": { + "External id": 980143,"Record function id": 0, "Ev Idx": 2734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937993701.220, "dur": 6.313, + "args": { + "External id": 980144,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937993711.177, "dur": 1.670, + "args": { + "External id": 980145,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937993715.047, "dur": 3.655, + "args": { + "External id": 980146,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937993720.491, "dur": 1.089, + "args": { + "External id": 980147,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937993723.203, "dur": 0.957, + "args": { + "External id": 980148,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937993726.020, "dur": 1.093, + "args": { + "External id": 980149,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937993729.105, "dur": 2.089, + "args": { + "External id": 980150,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937993732.837, "dur": 1.063, + "args": { + "External id": 980151,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937993735.944, "dur": 1.129, + "args": { + "External id": 980152,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937993738.937, "dur": 1.062, + "args": { + "External id": 980153,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937993761.104, "dur": 193.279, + "args": { + "External id": 980154,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937993781.295, "dur": 166.933, + "args": { + "External id": 980155,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937993799.476, "dur": 20.723, + "args": { + "External id": 980156,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345937993825.773, "dur": 83.647, + "args": { + "External id": 980157,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937993829.294, "dur": 79.733, + "args": { + "External id": 980158,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937993834.576, "dur": 6.338, + "args": { + "External id": 980159,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937993843.258, "dur": 64.962, + "args": { + "External id": 980160,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2751 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.17", "pid": 2338710, "tid": 2379450, + "ts": 6345937994157.032, "dur": 709.578, + "args": { + "External id": 980161,"Record function id": 0, "Ev Idx": 2752 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2338710, "tid": 2379450, + "ts": 6345937994180.726, "dur": 670.938, + "args": { + "External id": 980162,"Record function id": 0, "Ev Idx": 2753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937994250.855, "dur": 8.208, + "args": { + "External id": 980163,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345937994276.622, "dur": 40.000, + "args": { + "External id": 980164,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937994283.384, "dur": 2.175, + "args": { + "External id": 980165,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937994288.356, "dur": 0.726, + "args": { + "External id": 980166,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937994291.219, "dur": 0.735, + "args": { + "External id": 980167,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937994294.029, "dur": 0.612, + "args": { + "External id": 980168,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937994296.709, "dur": 0.961, + "args": { + "External id": 980169,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937994299.888, "dur": 2.952, + "args": { + "External id": 980170,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937994304.727, "dur": 0.508, + "args": { + "External id": 980171,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937994307.454, "dur": 0.394, + "args": { + "External id": 980172,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937994310.002, "dur": 0.600, + "args": { + "External id": 980173,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937994334.858, "dur": 62.524, + "args": { + "External id": 980174,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345937994437.764, "dur": 141.431, + "args": { + "External id": 980175,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937994451.471, "dur": 4.435, + "args": { + "External id": 980176,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345937994462.557, "dur": 12.752, + "args": { + "External id": 980177,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345937994467.652, "dur": 6.995, + "args": { + "External id": 980178,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937994472.236, "dur": 0.703, + "args": { + "External id": 980179,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345937994483.298, "dur": 31.814, + "args": { + "External id": 980180,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937994486.234, "dur": 0.470, + "args": { + "External id": 980181,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937994489.065, "dur": 0.624, + "args": { + "External id": 980182,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937994491.760, "dur": 3.193, + "args": { + "External id": 980183,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937994496.989, "dur": 0.627, + "args": { + "External id": 980184,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937994499.745, "dur": 0.536, + "args": { + "External id": 980185,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937994502.575, "dur": 0.454, + "args": { + "External id": 980186,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937994505.073, "dur": 0.423, + "args": { + "External id": 980187,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937994507.293, "dur": 0.612, + "args": { + "External id": 980188,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937994509.577, "dur": 0.483, + "args": { + "External id": 980189,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937994529.196, "dur": 41.011, + "args": { + "External id": 980190,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345937994636.109, "dur": 136.006, + "args": { + "External id": 980191,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937994663.418, "dur": 104.329, + "args": { + "External id": 980192,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2783, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345937994675.155, "dur": 87.573, + "args": { + "External id": 980193,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345937994787.904, "dur": 1.864, + "args": { + "External id": 980194,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2785, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937994884.569, "dur": 2085.720, + "args": { + "External id": 980195,"Sequence number": 10552280, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2786 + } + }, + { + "ph": "f", "id": 186, "pid": 2338710, "tid": 2379450, "ts": 6345937994884.569, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937995033.146, "dur": 181.283, + "args": { + "External id": 980196,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345937995267.035, "dur": 48.434, + "args": { + "External id": 980197,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345937995338.674, "dur": 62.771, + "args": { + "External id": 980198,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937995415.820, "dur": 38.987, + "args": { + "External id": 980199,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937995463.611, "dur": 38.954, + "args": { + "External id": 980200,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937995511.281, "dur": 33.743, + "args": { + "External id": 980201,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937995553.696, "dur": 34.105, + "args": { + "External id": 980202,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345937995615.617, "dur": 26.249, + "args": { + "External id": 980203,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345937995662.402, "dur": 33.207, + "args": { + "External id": 980204,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345937995719.122, "dur": 23.161, + "args": { + "External id": 980205,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345937995756.670, "dur": 18.721, + "args": { + "External id": 980206,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937995785.080, "dur": 45.077, + "args": { + "External id": 980207,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937995834.404, "dur": 38.222, + "args": { + "External id": 980208,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345937995908.853, "dur": 403.418, + "args": { + "External id": 980209,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937996039.339, "dur": 11.362, + "args": { + "External id": 980210,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937996089.519, "dur": 6.118, + "args": { + "External id": 980211,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937996097.389, "dur": 2.605, + "args": { + "External id": 980212,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937996101.996, "dur": 2.392, + "args": { + "External id": 980213,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937996170.967, "dur": 6.134, + "args": { + "External id": 980214,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937996173.271, "dur": 3.583, + "args": { + "External id": 980215,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937996180.101, "dur": 45.464, + "args": { + "External id": 980216,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937996187.181, "dur": 4.237, + "args": { + "External id": 980217,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345937996227.762, "dur": 2.956, + "args": { + "External id": 980218,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937996229.528, "dur": 1.061, + "args": { + "External id": 980219,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345937996232.952, "dur": 18.979, + "args": { + "External id": 980220,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937996235.414, "dur": 0.710, + "args": { + "External id": 980221,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345937996360.828, "dur": 35.940, + "args": { + "External id": 980222,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345937996416.461, "dur": 19.611, + "args": { + "External id": 980223,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937996447.162, "dur": 58.339, + "args": { + "External id": 980224,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937996513.762, "dur": 45.933, + "args": { + "External id": 980225,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937996572.519, "dur": 27.321, + "args": { + "External id": 980226,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937996607.785, "dur": 36.834, + "args": { + "External id": 980227,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937996653.758, "dur": 34.261, + "args": { + "External id": 980228,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345937996696.141, "dur": 39.188, + "args": { + "External id": 980229,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345937996757.287, "dur": 31.477, + "args": { + "External id": 980230,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345937996806.157, "dur": 28.068, + "args": { + "External id": 980231,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345937996849.357, "dur": 20.195, + "args": { + "External id": 980232,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345937996888.708, "dur": 16.047, + "args": { + "External id": 980233,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345937996918.240, "dur": 18.030, + "args": { + "External id": 980234,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937997037.757, "dur": 60.295, + "args": { + "External id": 980235,"Record function id": 0, "Ev Idx": 2826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937997042.663, "dur": 52.194, + "args": { + "External id": 980236,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937997047.857, "dur": 44.736, + "args": { + "External id": 980237,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937997050.102, "dur": 41.868, + "args": { + "External id": 980238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937997106.589, "dur": 7.576, + "args": { + "External id": 980239,"Record function id": 0, "Ev Idx": 2830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937997108.830, "dur": 4.835, + "args": { + "External id": 980240,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937997110.245, "dur": 2.819, + "args": { + "External id": 980241,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937997111.760, "dur": 1.199, + "args": { + "External id": 980242,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937997118.394, "dur": 7.430, + "args": { + "External id": 980243,"Record function id": 0, "Ev Idx": 2834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937997120.111, "dur": 5.239, + "args": { + "External id": 980244,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937997120.927, "dur": 3.921, + "args": { + "External id": 980245,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937997121.435, "dur": 3.302, + "args": { + "External id": 980246,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937997130.030, "dur": 4.314, + "args": { + "External id": 980247,"Record function id": 0, "Ev Idx": 2838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937997131.454, "dur": 2.415, + "args": { + "External id": 980248,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937997132.046, "dur": 1.331, + "args": { + "External id": 980249,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937997132.404, "dur": 0.856, + "args": { + "External id": 980250,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937997138.282, "dur": 4.910, + "args": { + "External id": 980251,"Record function id": 0, "Ev Idx": 2842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937997139.753, "dur": 2.956, + "args": { + "External id": 980252,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937997140.809, "dur": 1.295, + "args": { + "External id": 980253,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937997141.240, "dur": 0.778, + "args": { + "External id": 980254,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937997147.064, "dur": 4.997, + "args": { + "External id": 980255,"Record function id": 0, "Ev Idx": 2846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937997148.470, "dur": 3.117, + "args": { + "External id": 980256,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937997149.042, "dur": 2.062, + "args": { + "External id": 980257,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937997150.055, "dur": 0.927, + "args": { + "External id": 980258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937997155.975, "dur": 4.200, + "args": { + "External id": 980259,"Record function id": 0, "Ev Idx": 2850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937997157.308, "dur": 2.362, + "args": { + "External id": 980260,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937997157.870, "dur": 1.348, + "args": { + "External id": 980261,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937997158.425, "dur": 0.684, + "args": { + "External id": 980262,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937997164.008, "dur": 4.295, + "args": { + "External id": 980263,"Record function id": 0, "Ev Idx": 2854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937997165.509, "dur": 2.335, + "args": { + "External id": 980264,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937997166.084, "dur": 1.299, + "args": { + "External id": 980265,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937997166.423, "dur": 0.873, + "args": { + "External id": 980266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937997172.085, "dur": 4.331, + "args": { + "External id": 980267,"Record function id": 0, "Ev Idx": 2858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345937997173.420, "dur": 2.495, + "args": { + "External id": 980268,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937997174.036, "dur": 1.385, + "args": { + "External id": 980269,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345937997174.601, "dur": 0.712, + "args": { + "External id": 980270,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937997181.234, "dur": 60987.422, + "args": { + "External id": 980271,"Record function id": 0, "Sequence number": 10552279, "Fwd thread id": 1, "Ev Idx": 2862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345937997182.917, "dur": 60972.587, + "args": { + "External id": 980272,"Sequence number": 10552279, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2863 + } + }, + { + "ph": "f", "id": 187, "pid": 2338710, "tid": 2379450, "ts": 6345937997182.917, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.18)", "pid": 2338710, "tid": 2379450, + "ts": 6345937997221.774, "dur": 44.301, + "args": { + "External id": 980273,"Record function id": 0, "Ev Idx": 2864 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.18)", "pid": 2338710, "tid": 2379450, + "ts": 6345937997274.799, "dur": 72.602, + "args": { + "External id": 980274,"Record function id": 0, "Ev Idx": 2865 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.18)", "pid": 2338710, "tid": 2379450, + "ts": 6345937997353.565, "dur": 60790.898, + "args": { + "External id": 980275,"Record function id": 0, "Ev Idx": 2866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937997458.604, "dur": 8.642, + "args": { + "External id": 980276,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345937997478.724, "dur": 8.493, + "args": { + "External id": 980277,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345937997505.578, "dur": 59657.030, + "args": { + "External id": 980278,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345937997522.025, "dur": 59623.369, + "args": { + "External id": 980279,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345937997658.953, "dur": 22.656, + "args": { + "External id": 980280,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345937997706.655, "dur": 59375.632, + "args": { + "External id": 980281,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345937997711.432, "dur": 59369.091, + "args": { + "External id": 980282,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345937997717.426, "dur": 10.922, + "args": { + "External id": 980283,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345937997731.691, "dur": 59316.717, + "args": { + "External id": 980284,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938057296.498, "dur": 18.031, + "args": { + "External id": 980285,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938057302.456, "dur": 11.567, + "args": { + "External id": 980286,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938057352.403, "dur": 361.586, + "args": { + "External id": 980287,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938057388.577, "dur": 319.945, + "args": { + "External id": 980288,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2879, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938057403.736, "dur": 297.631, + "args": { + "External id": 980289,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938057737.624, "dur": 2.445, + "args": { + "External id": 980290,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2881, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938057805.490, "dur": 8.884, + "args": { + "External id": 980291,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938057871.195, "dur": 2.874, + "args": { + "External id": 980292,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938057892.789, "dur": 4.176, + "args": { + "External id": 980293,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938057911.989, "dur": 1.225, + "args": { + "External id": 980294,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938057928.529, "dur": 1.978, + "args": { + "External id": 980295,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938057945.673, "dur": 1.702, + "args": { + "External id": 980296,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938057961.529, "dur": 3.161, + "args": { + "External id": 980297,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938057980.388, "dur": 2.488, + "args": { + "External id": 980298,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938057996.769, "dur": 1.094, + "args": { + "External id": 980299,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938058188.730, "dur": 3463.736, + "args": { + "External id": 980300,"Record function id": 0, "Ev Idx": 2891 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.17)", "pid": 2338710, "tid": 2379450, + "ts": 6345938058211.603, "dur": 1279.903, + "args": { + "External id": 980301,"Record function id": 0, "Ev Idx": 2892 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2338710, "tid": 2379450, + "ts": 6345938058230.530, "dur": 399.242, + "args": { + "External id": 980302,"Record function id": 0, "Ev Idx": 2893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938058331.390, "dur": 6.161, + "args": { + "External id": 980303,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938058341.644, "dur": 1.099, + "args": { + "External id": 980304,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938058345.451, "dur": 3.556, + "args": { + "External id": 980305,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938058350.800, "dur": 0.999, + "args": { + "External id": 980306,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938058353.839, "dur": 1.053, + "args": { + "External id": 980307,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938058356.870, "dur": 0.922, + "args": { + "External id": 980308,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938058359.816, "dur": 2.558, + "args": { + "External id": 980309,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938058364.353, "dur": 1.184, + "args": { + "External id": 980310,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938058367.231, "dur": 1.183, + "args": { + "External id": 980311,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938058370.327, "dur": 0.823, + "args": { + "External id": 980312,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938058406.777, "dur": 185.698, + "args": { + "External id": 980313,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938058427.517, "dur": 159.468, + "args": { + "External id": 980314,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938058446.671, "dur": 18.710, + "args": { + "External id": 980315,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938058470.905, "dur": 81.255, + "args": { + "External id": 980316,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938058473.860, "dur": 77.936, + "args": { + "External id": 980317,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938058479.290, "dur": 6.000, + "args": { + "External id": 980318,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938058487.805, "dur": 63.138, + "args": { + "External id": 980319,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2910 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.16", "pid": 2338710, "tid": 2379450, + "ts": 6345938058724.381, "dur": 757.660, + "args": { + "External id": 980320,"Record function id": 0, "Ev Idx": 2911 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2338710, "tid": 2379450, + "ts": 6345938058744.919, "dur": 721.635, + "args": { + "External id": 980321,"Record function id": 0, "Ev Idx": 2912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938058812.589, "dur": 7.290, + "args": { + "External id": 980322,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938058836.529, "dur": 37.923, + "args": { + "External id": 980323,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938058842.307, "dur": 1.948, + "args": { + "External id": 980324,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938058846.785, "dur": 0.713, + "args": { + "External id": 980325,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938058849.822, "dur": 0.502, + "args": { + "External id": 980326,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938058852.815, "dur": 0.640, + "args": { + "External id": 980327,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938058855.687, "dur": 0.773, + "args": { + "External id": 980328,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938058858.465, "dur": 2.605, + "args": { + "External id": 980329,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938058863.141, "dur": 0.651, + "args": { + "External id": 980330,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938058866.016, "dur": 0.480, + "args": { + "External id": 980331,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938058868.679, "dur": 0.400, + "args": { + "External id": 980332,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938058885.927, "dur": 52.534, + "args": { + "External id": 980333,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345938058976.210, "dur": 216.901, + "args": { + "External id": 980334,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938058988.766, "dur": 4.180, + "args": { + "External id": 980335,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345938058999.665, "dur": 35.730, + "args": { + "External id": 980336,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345938059004.759, "dur": 30.075, + "args": { + "External id": 980337,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938059030.103, "dur": 1.307, + "args": { + "External id": 980338,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938059045.699, "dur": 78.392, + "args": { + "External id": 980339,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938059049.201, "dur": 0.587, + "args": { + "External id": 980340,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938059092.819, "dur": 1.209, + "args": { + "External id": 980341,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938059099.200, "dur": 2.961, + "args": { + "External id": 980342,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938059104.020, "dur": 0.780, + "args": { + "External id": 980343,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938059107.040, "dur": 0.517, + "args": { + "External id": 980344,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938059110.014, "dur": 0.777, + "args": { + "External id": 980345,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938059113.012, "dur": 0.579, + "args": { + "External id": 980346,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938059115.658, "dur": 0.489, + "args": { + "External id": 980347,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938059118.200, "dur": 0.457, + "args": { + "External id": 980348,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938059138.526, "dur": 44.905, + "args": { + "External id": 980349,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938059250.010, "dur": 137.116, + "args": { + "External id": 980350,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938059277.407, "dur": 105.575, + "args": { + "External id": 980351,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2942, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938059288.688, "dur": 88.462, + "args": { + "External id": 980352,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938059403.363, "dur": 1.987, + "args": { + "External id": 980353,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2944, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938059499.305, "dur": 2131.188, + "args": { + "External id": 980354,"Sequence number": 10552278, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2945 + } + }, + { + "ph": "f", "id": 188, "pid": 2338710, "tid": 2379450, "ts": 6345938059499.305, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938059625.348, "dur": 125.604, + "args": { + "External id": 980355,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938059795.293, "dur": 47.254, + "args": { + "External id": 980356,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345938059864.208, "dur": 61.981, + "args": { + "External id": 980357,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938059940.347, "dur": 37.274, + "args": { + "External id": 980358,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938059986.524, "dur": 64.559, + "args": { + "External id": 980359,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938060105.995, "dur": 41.855, + "args": { + "External id": 980360,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938060157.384, "dur": 34.392, + "args": { + "External id": 980361,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938060223.816, "dur": 28.334, + "args": { + "External id": 980362,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938060294.119, "dur": 32.971, + "args": { + "External id": 980363,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938060354.693, "dur": 27.092, + "args": { + "External id": 980364,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938060398.533, "dur": 17.734, + "args": { + "External id": 980365,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938060430.789, "dur": 47.961, + "args": { + "External id": 980366,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938060483.157, "dur": 39.463, + "args": { + "External id": 980367,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345938060558.071, "dur": 330.488, + "args": { + "External id": 980368,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938060650.623, "dur": 6.618, + "args": { + "External id": 980369,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938060659.998, "dur": 2.976, + "args": { + "External id": 980370,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938060671.708, "dur": 2.419, + "args": { + "External id": 980371,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938060675.859, "dur": 2.388, + "args": { + "External id": 980372,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938060730.591, "dur": 20.537, + "args": { + "External id": 980373,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938060746.380, "dur": 3.899, + "args": { + "External id": 980374,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938060754.237, "dur": 50.401, + "args": { + "External id": 980375,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938060761.968, "dur": 4.375, + "args": { + "External id": 980376,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938060807.101, "dur": 2.637, + "args": { + "External id": 980377,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938060808.736, "dur": 0.929, + "args": { + "External id": 980378,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938060811.731, "dur": 16.501, + "args": { + "External id": 980379,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938060813.905, "dur": 0.610, + "args": { + "External id": 980380,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938060932.869, "dur": 32.027, + "args": { + "External id": 980381,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938060983.615, "dur": 23.111, + "args": { + "External id": 980382,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938061040.655, "dur": 101.031, + "args": { + "External id": 980383,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938061153.742, "dur": 50.625, + "args": { + "External id": 980384,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938061219.235, "dur": 27.534, + "args": { + "External id": 980385,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938061254.717, "dur": 38.028, + "args": { + "External id": 980386,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938061302.418, "dur": 33.771, + "args": { + "External id": 980387,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938061344.317, "dur": 35.346, + "args": { + "External id": 980388,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345938061403.026, "dur": 30.272, + "args": { + "External id": 980389,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938061451.532, "dur": 29.104, + "args": { + "External id": 980390,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938061496.561, "dur": 21.425, + "args": { + "External id": 980391,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938061536.466, "dur": 21.921, + "args": { + "External id": 980392,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345938061572.887, "dur": 19.347, + "args": { + "External id": 980393,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938061679.070, "dur": 18.617, + "args": { + "External id": 980394,"Record function id": 0, "Ev Idx": 2985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938061683.200, "dur": 13.422, + "args": { + "External id": 980395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938061688.411, "dur": 7.230, + "args": { + "External id": 980396,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938061690.543, "dur": 4.943, + "args": { + "External id": 980397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938061702.807, "dur": 6.194, + "args": { + "External id": 980398,"Record function id": 0, "Ev Idx": 2989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938061704.433, "dur": 4.047, + "args": { + "External id": 980399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938061705.322, "dur": 2.606, + "args": { + "External id": 980400,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938061706.608, "dur": 1.196, + "args": { + "External id": 980401,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938061713.332, "dur": 8.172, + "args": { + "External id": 980402,"Record function id": 0, "Ev Idx": 2993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938061714.981, "dur": 6.025, + "args": { + "External id": 980403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938061715.536, "dur": 4.990, + "args": { + "External id": 980404,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938061716.101, "dur": 4.347, + "args": { + "External id": 980405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938061725.642, "dur": 5.019, + "args": { + "External id": 980406,"Record function id": 0, "Ev Idx": 2997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938061727.066, "dur": 3.139, + "args": { + "External id": 980407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938061728.023, "dur": 1.665, + "args": { + "External id": 980408,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938061728.570, "dur": 1.042, + "args": { + "External id": 980409,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938061734.651, "dur": 4.892, + "args": { + "External id": 980410,"Record function id": 0, "Ev Idx": 3001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938061736.226, "dur": 2.843, + "args": { + "External id": 980411,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938061737.128, "dur": 1.473, + "args": { + "External id": 980412,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938061737.593, "dur": 0.888, + "args": { + "External id": 980413,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938061743.224, "dur": 4.830, + "args": { + "External id": 980414,"Record function id": 0, "Ev Idx": 3005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938061744.968, "dur": 2.640, + "args": { + "External id": 980415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938061745.581, "dur": 1.528, + "args": { + "External id": 980416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938061746.307, "dur": 0.663, + "args": { + "External id": 980417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938061751.768, "dur": 4.158, + "args": { + "External id": 980418,"Record function id": 0, "Ev Idx": 3009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938061753.101, "dur": 2.360, + "args": { + "External id": 980419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938061753.682, "dur": 1.304, + "args": { + "External id": 980420,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938061754.042, "dur": 0.806, + "args": { + "External id": 980421,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938061759.510, "dur": 3.789, + "args": { + "External id": 980422,"Record function id": 0, "Ev Idx": 3013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938061760.774, "dur": 2.087, + "args": { + "External id": 980423,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938061761.314, "dur": 1.075, + "args": { + "External id": 980424,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938061761.631, "dur": 0.679, + "args": { + "External id": 980425,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938061767.118, "dur": 4.299, + "args": { + "External id": 980426,"Record function id": 0, "Ev Idx": 3017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938061768.426, "dur": 2.535, + "args": { + "External id": 980427,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938061768.987, "dur": 1.479, + "args": { + "External id": 980428,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938061769.592, "dur": 0.740, + "args": { + "External id": 980429,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938061776.043, "dur": 64707.562, + "args": { + "External id": 980430,"Record function id": 0, "Sequence number": 10552277, "Fwd thread id": 1, "Ev Idx": 3021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938061777.449, "dur": 64695.928, + "args": { + "External id": 980431,"Sequence number": 10552277, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3022 + } + }, + { + "ph": "f", "id": 189, "pid": 2338710, "tid": 2379450, "ts": 6345938061777.449, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.17)", "pid": 2338710, "tid": 2379450, + "ts": 6345938061811.661, "dur": 48.903, + "args": { + "External id": 980432,"Record function id": 0, "Ev Idx": 3023 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.17)", "pid": 2338710, "tid": 2379450, + "ts": 6345938061869.661, "dur": 72.604, + "args": { + "External id": 980433,"Record function id": 0, "Ev Idx": 3024 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.17)", "pid": 2338710, "tid": 2379450, + "ts": 6345938061949.111, "dur": 64513.792, + "args": { + "External id": 980434,"Record function id": 0, "Ev Idx": 3025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938062117.386, "dur": 9.906, + "args": { + "External id": 980435,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938062140.850, "dur": 8.536, + "args": { + "External id": 980436,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938062167.599, "dur": 63181.418, + "args": { + "External id": 980437,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938062184.216, "dur": 63147.902, + "args": { + "External id": 980438,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938062297.273, "dur": 23.430, + "args": { + "External id": 980439,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938062345.746, "dur": 62928.244, + "args": { + "External id": 980440,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938062350.067, "dur": 62922.649, + "args": { + "External id": 980441,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938062355.749, "dur": 9.874, + "args": { + "External id": 980442,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938062368.303, "dur": 62897.332, + "args": { + "External id": 980443,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938125486.162, "dur": 17.205, + "args": { + "External id": 980444,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938125491.722, "dur": 11.101, + "args": { + "External id": 980445,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938125543.669, "dur": 498.403, + "args": { + "External id": 980446,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938125579.484, "dur": 454.375, + "args": { + "External id": 980447,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3038, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938125594.119, "dur": 410.885, + "args": { + "External id": 980448,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938126105.271, "dur": 3.941, + "args": { + "External id": 980449,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3040, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938126191.374, "dur": 9.298, + "args": { + "External id": 980450,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938126258.803, "dur": 2.885, + "args": { + "External id": 980451,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938126282.404, "dur": 4.670, + "args": { + "External id": 980452,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938126302.392, "dur": 0.986, + "args": { + "External id": 980453,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938126319.063, "dur": 1.209, + "args": { + "External id": 980454,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938126335.400, "dur": 1.350, + "args": { + "External id": 980455,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938126351.459, "dur": 4.341, + "args": { + "External id": 980456,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938126369.552, "dur": 2.611, + "args": { + "External id": 980457,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938126386.323, "dur": 1.294, + "args": { + "External id": 980458,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938126502.581, "dur": 3470.415, + "args": { + "External id": 980459,"Record function id": 0, "Ev Idx": 3050 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.16)", "pid": 2338710, "tid": 2379450, + "ts": 6345938126524.359, "dur": 1337.877, + "args": { + "External id": 980460,"Record function id": 0, "Ev Idx": 3051 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2338710, "tid": 2379450, + "ts": 6345938126540.928, "dur": 406.315, + "args": { + "External id": 980461,"Record function id": 0, "Ev Idx": 3052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938126639.334, "dur": 5.471, + "args": { + "External id": 980462,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938126648.669, "dur": 4.635, + "args": { + "External id": 980463,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938126655.286, "dur": 3.984, + "args": { + "External id": 980464,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938126661.263, "dur": 0.933, + "args": { + "External id": 980465,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938126664.319, "dur": 1.098, + "args": { + "External id": 980466,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938126667.457, "dur": 0.932, + "args": { + "External id": 980467,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938126670.487, "dur": 2.368, + "args": { + "External id": 980468,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938126674.748, "dur": 1.065, + "args": { + "External id": 980469,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938126677.723, "dur": 1.125, + "args": { + "External id": 980470,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938126680.806, "dur": 1.083, + "args": { + "External id": 980471,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938126722.102, "dur": 188.911, + "args": { + "External id": 980472,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938126741.917, "dur": 163.534, + "args": { + "External id": 980473,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938126760.847, "dur": 22.892, + "args": { + "External id": 980474,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938126789.667, "dur": 80.486, + "args": { + "External id": 980475,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938126793.577, "dur": 76.003, + "args": { + "External id": 980476,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938126798.905, "dur": 6.015, + "args": { + "External id": 980477,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938126807.522, "dur": 60.956, + "args": { + "External id": 980478,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3069 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.15", "pid": 2338710, "tid": 2379450, + "ts": 6345938127111.312, "dur": 741.175, + "args": { + "External id": 980479,"Record function id": 0, "Ev Idx": 3070 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2338710, "tid": 2379450, + "ts": 6345938127132.145, "dur": 705.122, + "args": { + "External id": 980480,"Record function id": 0, "Ev Idx": 3071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938127232.740, "dur": 20.398, + "args": { + "External id": 980481,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938127272.641, "dur": 38.996, + "args": { + "External id": 980482,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938127279.167, "dur": 2.107, + "args": { + "External id": 980483,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938127284.300, "dur": 0.460, + "args": { + "External id": 980484,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938127286.906, "dur": 0.455, + "args": { + "External id": 980485,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938127289.588, "dur": 0.500, + "args": { + "External id": 980486,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938127292.073, "dur": 0.688, + "args": { + "External id": 980487,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938127294.568, "dur": 2.632, + "args": { + "External id": 980488,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938127299.324, "dur": 0.457, + "args": { + "External id": 980489,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938127301.899, "dur": 0.476, + "args": { + "External id": 980490,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938127304.535, "dur": 0.402, + "args": { + "External id": 980491,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938127322.771, "dur": 61.931, + "args": { + "External id": 980492,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345938127425.678, "dur": 138.256, + "args": { + "External id": 980493,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938127439.757, "dur": 4.379, + "args": { + "External id": 980494,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345938127450.951, "dur": 12.086, + "args": { + "External id": 980495,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345938127456.269, "dur": 6.277, + "args": { + "External id": 980496,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938127460.402, "dur": 0.664, + "args": { + "External id": 980497,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938127470.947, "dur": 30.932, + "args": { + "External id": 980498,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938127473.617, "dur": 0.667, + "args": { + "External id": 980499,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938127476.451, "dur": 0.551, + "args": { + "External id": 980500,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938127479.070, "dur": 2.657, + "args": { + "External id": 980501,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938127483.579, "dur": 0.396, + "args": { + "External id": 980502,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938127486.239, "dur": 0.373, + "args": { + "External id": 980503,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938127488.575, "dur": 0.458, + "args": { + "External id": 980504,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938127491.053, "dur": 0.495, + "args": { + "External id": 980505,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938127493.903, "dur": 0.453, + "args": { + "External id": 980506,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938127496.515, "dur": 0.485, + "args": { + "External id": 980507,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938127517.007, "dur": 38.224, + "args": { + "External id": 980508,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938127621.153, "dur": 138.470, + "args": { + "External id": 980509,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938127650.749, "dur": 104.540, + "args": { + "External id": 980510,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3101, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938127662.602, "dur": 87.536, + "args": { + "External id": 980511,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938127775.431, "dur": 1.932, + "args": { + "External id": 980512,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3103, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938127870.213, "dur": 2078.456, + "args": { + "External id": 980513,"Sequence number": 10552276, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3104 + } + }, + { + "ph": "f", "id": 190, "pid": 2338710, "tid": 2379450, "ts": 6345938127870.213, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938127994.853, "dur": 192.234, + "args": { + "External id": 980514,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938128258.823, "dur": 52.661, + "args": { + "External id": 980515,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345938128335.025, "dur": 69.042, + "args": { + "External id": 980516,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938128418.382, "dur": 39.468, + "args": { + "External id": 980517,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938128466.500, "dur": 38.768, + "args": { + "External id": 980518,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938128513.880, "dur": 32.850, + "args": { + "External id": 980519,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938128555.599, "dur": 32.873, + "args": { + "External id": 980520,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938128617.941, "dur": 25.163, + "args": { + "External id": 980521,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938128665.765, "dur": 33.332, + "args": { + "External id": 980522,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938128722.905, "dur": 25.575, + "args": { + "External id": 980523,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938128762.900, "dur": 16.213, + "args": { + "External id": 980524,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938128788.826, "dur": 48.714, + "args": { + "External id": 980525,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938128841.786, "dur": 38.736, + "args": { + "External id": 980526,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345938128914.732, "dur": 398.670, + "args": { + "External id": 980527,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938129004.551, "dur": 26.372, + "args": { + "External id": 980528,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938129034.853, "dur": 3.570, + "args": { + "External id": 980529,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938129040.289, "dur": 2.509, + "args": { + "External id": 980530,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938129044.301, "dur": 2.232, + "args": { + "External id": 980531,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938129158.628, "dur": 8.187, + "args": { + "External id": 980532,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938129161.337, "dur": 4.377, + "args": { + "External id": 980533,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938129169.760, "dur": 44.708, + "args": { + "External id": 980534,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938129177.457, "dur": 4.979, + "args": { + "External id": 980535,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938129216.814, "dur": 2.093, + "args": { + "External id": 980536,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938129218.053, "dur": 0.764, + "args": { + "External id": 980537,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938129220.452, "dur": 20.345, + "args": { + "External id": 980538,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938129223.498, "dur": 0.754, + "args": { + "External id": 980539,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938129357.933, "dur": 34.590, + "args": { + "External id": 980540,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938129411.566, "dur": 18.774, + "args": { + "External id": 980541,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938129441.153, "dur": 58.953, + "args": { + "External id": 980542,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938129508.590, "dur": 46.270, + "args": { + "External id": 980543,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938129566.384, "dur": 25.234, + "args": { + "External id": 980544,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938129598.791, "dur": 36.132, + "args": { + "External id": 980545,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938129644.201, "dur": 32.072, + "args": { + "External id": 980546,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938129684.287, "dur": 35.918, + "args": { + "External id": 980547,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345938129740.659, "dur": 27.191, + "args": { + "External id": 980548,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938129785.055, "dur": 27.087, + "args": { + "External id": 980549,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938129827.882, "dur": 19.188, + "args": { + "External id": 980550,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938129866.204, "dur": 16.284, + "args": { + "External id": 980551,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345938129897.074, "dur": 17.831, + "args": { + "External id": 980552,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938129998.687, "dur": 38.072, + "args": { + "External id": 980553,"Record function id": 0, "Ev Idx": 3144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938130003.137, "dur": 31.652, + "args": { + "External id": 980554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938130025.850, "dur": 7.408, + "args": { + "External id": 980555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938130028.149, "dur": 4.739, + "args": { + "External id": 980556,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938130044.390, "dur": 76.570, + "args": { + "External id": 980557,"Record function id": 0, "Ev Idx": 3148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938130046.561, "dur": 72.775, + "args": { + "External id": 980558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938130047.827, "dur": 69.971, + "args": { + "External id": 980559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938130113.488, "dur": 3.881, + "args": { + "External id": 980560,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938130128.134, "dur": 8.699, + "args": { + "External id": 980561,"Record function id": 0, "Ev Idx": 3152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938130129.960, "dur": 6.417, + "args": { + "External id": 980562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938130131.336, "dur": 4.564, + "args": { + "External id": 980563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938130132.036, "dur": 3.773, + "args": { + "External id": 980564,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938130140.961, "dur": 4.782, + "args": { + "External id": 980565,"Record function id": 0, "Ev Idx": 3156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938130142.590, "dur": 2.667, + "args": { + "External id": 980566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938130143.414, "dur": 1.349, + "args": { + "External id": 980567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938130143.823, "dur": 0.852, + "args": { + "External id": 980568,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938130149.428, "dur": 4.682, + "args": { + "External id": 980569,"Record function id": 0, "Ev Idx": 3160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938130150.768, "dur": 2.868, + "args": { + "External id": 980570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938130151.537, "dur": 1.666, + "args": { + "External id": 980571,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938130152.258, "dur": 0.827, + "args": { + "External id": 980572,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938130157.973, "dur": 4.644, + "args": { + "External id": 980573,"Record function id": 0, "Ev Idx": 3164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938130159.707, "dur": 2.414, + "args": { + "External id": 980574,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938130160.422, "dur": 1.266, + "args": { + "External id": 980575,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938130160.886, "dur": 0.697, + "args": { + "External id": 980576,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938130166.341, "dur": 10.270, + "args": { + "External id": 980577,"Record function id": 0, "Ev Idx": 3168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938130171.351, "dur": 4.752, + "args": { + "External id": 980578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938130171.918, "dur": 3.739, + "args": { + "External id": 980579,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938130174.752, "dur": 0.791, + "args": { + "External id": 980580,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938130180.599, "dur": 4.444, + "args": { + "External id": 980581,"Record function id": 0, "Ev Idx": 3172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938130182.057, "dur": 2.526, + "args": { + "External id": 980582,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938130182.865, "dur": 1.252, + "args": { + "External id": 980583,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938130183.238, "dur": 0.792, + "args": { + "External id": 980584,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938130188.824, "dur": 4.127, + "args": { + "External id": 980585,"Record function id": 0, "Ev Idx": 3176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938130190.048, "dur": 2.415, + "args": { + "External id": 980586,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938130190.610, "dur": 1.420, + "args": { + "External id": 980587,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938130191.250, "dur": 0.688, + "args": { + "External id": 980588,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938130197.780, "dur": 65101.840, + "args": { + "External id": 980589,"Record function id": 0, "Sequence number": 10552275, "Fwd thread id": 1, "Ev Idx": 3180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938130199.499, "dur": 65088.048, + "args": { + "External id": 980590,"Sequence number": 10552275, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3181 + } + }, + { + "ph": "f", "id": 191, "pid": 2338710, "tid": 2379450, "ts": 6345938130199.499, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.16)", "pid": 2338710, "tid": 2379450, + "ts": 6345938130238.865, "dur": 42.506, + "args": { + "External id": 980591,"Record function id": 0, "Ev Idx": 3182 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.16)", "pid": 2338710, "tid": 2379450, + "ts": 6345938130290.563, "dur": 68.053, + "args": { + "External id": 980592,"Record function id": 0, "Ev Idx": 3183 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.16)", "pid": 2338710, "tid": 2379450, + "ts": 6345938130365.277, "dur": 64911.179, + "args": { + "External id": 980593,"Record function id": 0, "Ev Idx": 3184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938130472.142, "dur": 8.959, + "args": { + "External id": 980594,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938130493.231, "dur": 8.182, + "args": { + "External id": 980595,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938130520.392, "dur": 63692.451, + "args": { + "External id": 980596,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938130536.007, "dur": 63659.474, + "args": { + "External id": 980597,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938130639.656, "dur": 27.147, + "args": { + "External id": 980598,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938130692.034, "dur": 63444.325, + "args": { + "External id": 980599,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938130696.658, "dur": 63438.446, + "args": { + "External id": 980600,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938130702.627, "dur": 10.852, + "args": { + "External id": 980601,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938130716.447, "dur": 63411.819, + "args": { + "External id": 980602,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938194355.044, "dur": 16.591, + "args": { + "External id": 980603,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938194360.605, "dur": 10.601, + "args": { + "External id": 980604,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938194421.113, "dur": 429.137, + "args": { + "External id": 980605,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938194460.391, "dur": 384.149, + "args": { + "External id": 980606,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3197, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938194475.456, "dur": 361.912, + "args": { + "External id": 980607,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938194870.474, "dur": 2.516, + "args": { + "External id": 980608,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3199, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938194937.212, "dur": 8.604, + "args": { + "External id": 980609,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938195005.238, "dur": 23.300, + "args": { + "External id": 980610,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938195097.062, "dur": 5.391, + "args": { + "External id": 980611,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938195121.894, "dur": 1.038, + "args": { + "External id": 980612,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938195136.460, "dur": 1.135, + "args": { + "External id": 980613,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938195154.064, "dur": 1.308, + "args": { + "External id": 980614,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938195168.130, "dur": 3.373, + "args": { + "External id": 980615,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938195183.358, "dur": 2.665, + "args": { + "External id": 980616,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938195197.558, "dur": 0.899, + "args": { + "External id": 980617,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938195321.516, "dur": 3389.154, + "args": { + "External id": 980618,"Record function id": 0, "Ev Idx": 3209 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.15)", "pid": 2338710, "tid": 2379450, + "ts": 6345938195344.309, "dur": 1268.658, + "args": { + "External id": 980619,"Record function id": 0, "Ev Idx": 3210 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2338710, "tid": 2379450, + "ts": 6345938195362.012, "dur": 408.768, + "args": { + "External id": 980620,"Record function id": 0, "Ev Idx": 3211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938195467.780, "dur": 6.329, + "args": { + "External id": 980621,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938195477.942, "dur": 0.991, + "args": { + "External id": 980622,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938195480.764, "dur": 3.275, + "args": { + "External id": 980623,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938195485.675, "dur": 0.855, + "args": { + "External id": 980624,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938195488.336, "dur": 1.203, + "args": { + "External id": 980625,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938195491.260, "dur": 0.938, + "args": { + "External id": 980626,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938195494.034, "dur": 1.999, + "args": { + "External id": 980627,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938195497.537, "dur": 0.769, + "args": { + "External id": 980628,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938195500.128, "dur": 0.952, + "args": { + "External id": 980629,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938195502.781, "dur": 0.751, + "args": { + "External id": 980630,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938195525.070, "dur": 211.224, + "args": { + "External id": 980631,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938195559.355, "dur": 170.937, + "args": { + "External id": 980632,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938195578.302, "dur": 21.224, + "args": { + "External id": 980633,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938195605.715, "dur": 85.990, + "args": { + "External id": 980634,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938195609.243, "dur": 81.977, + "args": { + "External id": 980635,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938195615.300, "dur": 7.688, + "args": { + "External id": 980636,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938195625.147, "dur": 64.885, + "args": { + "External id": 980637,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3228 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.14", "pid": 2338710, "tid": 2379450, + "ts": 6345938195862.424, "dur": 740.470, + "args": { + "External id": 980638,"Record function id": 0, "Ev Idx": 3229 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2338710, "tid": 2379450, + "ts": 6345938195882.094, "dur": 702.661, + "args": { + "External id": 980639,"Record function id": 0, "Ev Idx": 3230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938195944.611, "dur": 6.574, + "args": { + "External id": 980640,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938195967.856, "dur": 35.944, + "args": { + "External id": 980641,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938195974.121, "dur": 1.521, + "args": { + "External id": 980642,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938195978.004, "dur": 0.554, + "args": { + "External id": 980643,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938195980.611, "dur": 0.545, + "args": { + "External id": 980644,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938195983.262, "dur": 0.513, + "args": { + "External id": 980645,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938195985.678, "dur": 0.594, + "args": { + "External id": 980646,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938195988.191, "dur": 2.818, + "args": { + "External id": 980647,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938195993.099, "dur": 0.504, + "args": { + "External id": 980648,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938195995.578, "dur": 0.469, + "args": { + "External id": 980649,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938195998.051, "dur": 0.663, + "args": { + "External id": 980650,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938196036.355, "dur": 100.465, + "args": { + "External id": 980651,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345938196180.349, "dur": 140.454, + "args": { + "External id": 980652,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938196195.169, "dur": 5.453, + "args": { + "External id": 980653,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345938196207.466, "dur": 13.399, + "args": { + "External id": 980654,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345938196212.609, "dur": 7.777, + "args": { + "External id": 980655,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938196217.584, "dur": 0.949, + "args": { + "External id": 980656,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938196230.080, "dur": 29.709, + "args": { + "External id": 980657,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938196232.859, "dur": 0.610, + "args": { + "External id": 980658,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938196235.404, "dur": 0.410, + "args": { + "External id": 980659,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938196237.532, "dur": 2.957, + "args": { + "External id": 980660,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938196242.119, "dur": 0.623, + "args": { + "External id": 980661,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938196244.461, "dur": 0.384, + "args": { + "External id": 980662,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938196246.642, "dur": 0.563, + "args": { + "External id": 980663,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938196249.204, "dur": 0.592, + "args": { + "External id": 980664,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938196251.945, "dur": 0.491, + "args": { + "External id": 980665,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938196254.393, "dur": 0.508, + "args": { + "External id": 980666,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938196273.052, "dur": 38.172, + "args": { + "External id": 980667,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938196375.414, "dur": 134.623, + "args": { + "External id": 980668,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938196402.566, "dur": 103.012, + "args": { + "External id": 980669,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3260, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938196414.295, "dur": 86.471, + "args": { + "External id": 980670,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938196525.152, "dur": 1.991, + "args": { + "External id": 980671,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3262, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938196621.085, "dur": 2065.256, + "args": { + "External id": 980672,"Sequence number": 10552274, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3263 + } + }, + { + "ph": "f", "id": 192, "pid": 2338710, "tid": 2379450, "ts": 6345938196621.085, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938196754.498, "dur": 130.176, + "args": { + "External id": 980673,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938196927.740, "dur": 45.607, + "args": { + "External id": 980674,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345938196994.007, "dur": 134.358, + "args": { + "External id": 980675,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938197148.878, "dur": 44.791, + "args": { + "External id": 980676,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938197203.411, "dur": 38.290, + "args": { + "External id": 980677,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938197250.897, "dur": 32.193, + "args": { + "External id": 980678,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938197291.385, "dur": 32.410, + "args": { + "External id": 980679,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938197355.006, "dur": 27.841, + "args": { + "External id": 980680,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938197403.459, "dur": 30.094, + "args": { + "External id": 980681,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938197454.923, "dur": 22.193, + "args": { + "External id": 980682,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938197492.747, "dur": 17.368, + "args": { + "External id": 980683,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938197520.010, "dur": 43.866, + "args": { + "External id": 980684,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938197568.307, "dur": 39.201, + "args": { + "External id": 980685,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345938197641.990, "dur": 312.446, + "args": { + "External id": 980686,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938197731.975, "dur": 7.636, + "args": { + "External id": 980687,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938197742.078, "dur": 2.798, + "args": { + "External id": 980688,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938197746.217, "dur": 2.183, + "args": { + "External id": 980689,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938197749.530, "dur": 2.040, + "args": { + "External id": 980690,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938197819.136, "dur": 6.827, + "args": { + "External id": 980691,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938197821.530, "dur": 3.962, + "args": { + "External id": 980692,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938197828.495, "dur": 39.780, + "args": { + "External id": 980693,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938197834.957, "dur": 4.593, + "args": { + "External id": 980694,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938197870.746, "dur": 2.755, + "args": { + "External id": 980695,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938197872.549, "dur": 0.769, + "args": { + "External id": 980696,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938197875.526, "dur": 16.253, + "args": { + "External id": 980697,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938197877.726, "dur": 0.731, + "args": { + "External id": 980698,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938197999.440, "dur": 103.149, + "args": { + "External id": 980699,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938198127.395, "dur": 21.619, + "args": { + "External id": 980700,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938198160.420, "dur": 61.350, + "args": { + "External id": 980701,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938198230.444, "dur": 46.994, + "args": { + "External id": 980702,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938198290.009, "dur": 29.127, + "args": { + "External id": 980703,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938198326.280, "dur": 35.405, + "args": { + "External id": 980704,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938198370.621, "dur": 33.235, + "args": { + "External id": 980705,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938198411.952, "dur": 38.367, + "args": { + "External id": 980706,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345938198470.901, "dur": 29.761, + "args": { + "External id": 980707,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938198517.579, "dur": 27.032, + "args": { + "External id": 980708,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938198563.039, "dur": 19.060, + "args": { + "External id": 980709,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938198599.476, "dur": 19.633, + "args": { + "External id": 980710,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345938198632.817, "dur": 17.963, + "args": { + "External id": 980711,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938198736.883, "dur": 18.059, + "args": { + "External id": 980712,"Record function id": 0, "Ev Idx": 3303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938198740.697, "dur": 13.074, + "args": { + "External id": 980713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938198745.724, "dur": 7.096, + "args": { + "External id": 980714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938198747.866, "dur": 4.814, + "args": { + "External id": 980715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938198759.806, "dur": 5.516, + "args": { + "External id": 980716,"Record function id": 0, "Ev Idx": 3307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938198761.232, "dur": 3.575, + "args": { + "External id": 980717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938198762.263, "dur": 2.050, + "args": { + "External id": 980718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938198763.166, "dur": 1.000, + "args": { + "External id": 980719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938198769.510, "dur": 7.562, + "args": { + "External id": 980720,"Record function id": 0, "Ev Idx": 3311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938198770.944, "dur": 5.662, + "args": { + "External id": 980721,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938198772.014, "dur": 4.090, + "args": { + "External id": 980722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938198772.526, "dur": 3.480, + "args": { + "External id": 980723,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938198781.088, "dur": 3.902, + "args": { + "External id": 980724,"Record function id": 0, "Ev Idx": 3315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938198782.212, "dur": 2.301, + "args": { + "External id": 980725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938198782.791, "dur": 1.255, + "args": { + "External id": 980726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938198783.146, "dur": 0.818, + "args": { + "External id": 980727,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938198788.941, "dur": 4.628, + "args": { + "External id": 980728,"Record function id": 0, "Ev Idx": 3319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938198790.429, "dur": 2.693, + "args": { + "External id": 980729,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938198791.148, "dur": 1.510, + "args": { + "External id": 980730,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938198791.726, "dur": 0.797, + "args": { + "External id": 980731,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938198797.608, "dur": 4.320, + "args": { + "External id": 980732,"Record function id": 0, "Ev Idx": 3323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938198798.806, "dur": 2.653, + "args": { + "External id": 980733,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938198799.531, "dur": 1.423, + "args": { + "External id": 980734,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938198800.145, "dur": 0.678, + "args": { + "External id": 980735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938198805.732, "dur": 4.333, + "args": { + "External id": 980736,"Record function id": 0, "Ev Idx": 3327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938198806.856, "dur": 2.735, + "args": { + "External id": 980737,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938198807.509, "dur": 1.557, + "args": { + "External id": 980738,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938198808.153, "dur": 0.781, + "args": { + "External id": 980739,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938198813.749, "dur": 4.423, + "args": { + "External id": 980740,"Record function id": 0, "Ev Idx": 3331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938198814.980, "dur": 2.736, + "args": { + "External id": 980741,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938198815.819, "dur": 1.416, + "args": { + "External id": 980742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938198816.239, "dur": 0.903, + "args": { + "External id": 980743,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938198821.902, "dur": 4.293, + "args": { + "External id": 980744,"Record function id": 0, "Ev Idx": 3335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938198822.966, "dur": 2.748, + "args": { + "External id": 980745,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938198823.707, "dur": 1.534, + "args": { + "External id": 980746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938198824.420, "dur": 0.713, + "args": { + "External id": 980747,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938198830.960, "dur": 64425.323, + "args": { + "External id": 980748,"Record function id": 0, "Sequence number": 10552273, "Fwd thread id": 1, "Ev Idx": 3339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938198832.411, "dur": 64412.732, + "args": { + "External id": 980749,"Sequence number": 10552273, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3340 + } + }, + { + "ph": "f", "id": 193, "pid": 2338710, "tid": 2379450, "ts": 6345938198832.411, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.15)", "pid": 2338710, "tid": 2379450, + "ts": 6345938198865.488, "dur": 45.585, + "args": { + "External id": 980750,"Record function id": 0, "Ev Idx": 3341 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.15)", "pid": 2338710, "tid": 2379450, + "ts": 6345938198919.740, "dur": 69.093, + "args": { + "External id": 980751,"Record function id": 0, "Ev Idx": 3342 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.15)", "pid": 2338710, "tid": 2379450, + "ts": 6345938198994.669, "dur": 64239.938, + "args": { + "External id": 980752,"Record function id": 0, "Ev Idx": 3343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938199171.109, "dur": 9.215, + "args": { + "External id": 980753,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938199193.292, "dur": 8.385, + "args": { + "External id": 980754,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938199219.848, "dur": 62932.976, + "args": { + "External id": 980755,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938199236.003, "dur": 62900.102, + "args": { + "External id": 980756,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938199384.555, "dur": 21.768, + "args": { + "External id": 980757,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938199431.036, "dur": 62617.416, + "args": { + "External id": 980758,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938199435.481, "dur": 62611.649, + "args": { + "External id": 980759,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938199440.661, "dur": 11.305, + "args": { + "External id": 980760,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938199454.621, "dur": 62585.352, + "args": { + "External id": 980761,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938262292.734, "dur": 17.344, + "args": { + "External id": 980762,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938262298.554, "dur": 11.093, + "args": { + "External id": 980763,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938262355.018, "dur": 432.110, + "args": { + "External id": 980764,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938262393.447, "dur": 386.567, + "args": { + "External id": 980765,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3356, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938262408.251, "dur": 365.000, + "args": { + "External id": 980766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938262813.612, "dur": 3.675, + "args": { + "External id": 980767,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3358, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938262890.407, "dur": 8.457, + "args": { + "External id": 980768,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938262956.898, "dur": 2.757, + "args": { + "External id": 980769,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938262979.217, "dur": 4.382, + "args": { + "External id": 980770,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938262997.448, "dur": 1.064, + "args": { + "External id": 980771,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938263037.045, "dur": 3.059, + "args": { + "External id": 980772,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938263092.012, "dur": 3.007, + "args": { + "External id": 980773,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938263111.450, "dur": 3.742, + "args": { + "External id": 980774,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938263137.117, "dur": 3.592, + "args": { + "External id": 980775,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938263153.967, "dur": 1.097, + "args": { + "External id": 980776,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938263277.380, "dur": 3400.565, + "args": { + "External id": 980777,"Record function id": 0, "Ev Idx": 3368 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.14)", "pid": 2338710, "tid": 2379450, + "ts": 6345938263300.982, "dur": 1296.281, + "args": { + "External id": 980778,"Record function id": 0, "Ev Idx": 3369 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2338710, "tid": 2379450, + "ts": 6345938263320.587, "dur": 398.633, + "args": { + "External id": 980779,"Record function id": 0, "Ev Idx": 3370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938263430.245, "dur": 6.493, + "args": { + "External id": 980780,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938263440.071, "dur": 1.155, + "args": { + "External id": 980781,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938263443.776, "dur": 3.195, + "args": { + "External id": 980782,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938263448.671, "dur": 0.982, + "args": { + "External id": 980783,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938263451.463, "dur": 0.848, + "args": { + "External id": 980784,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938263454.037, "dur": 1.020, + "args": { + "External id": 980785,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938263457.049, "dur": 2.574, + "args": { + "External id": 980786,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938263460.976, "dur": 0.854, + "args": { + "External id": 980787,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938263463.714, "dur": 1.022, + "args": { + "External id": 980788,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938263466.196, "dur": 0.994, + "args": { + "External id": 980789,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938263487.824, "dur": 194.515, + "args": { + "External id": 980790,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938263507.429, "dur": 168.141, + "args": { + "External id": 980791,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938263527.779, "dur": 19.650, + "args": { + "External id": 980792,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938263553.113, "dur": 84.428, + "args": { + "External id": 980793,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938263556.580, "dur": 80.431, + "args": { + "External id": 980794,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938263561.951, "dur": 5.842, + "args": { + "External id": 980795,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938263570.170, "dur": 66.131, + "args": { + "External id": 980796,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3387 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.13", "pid": 2338710, "tid": 2379450, + "ts": 6345938263816.301, "dur": 771.648, + "args": { + "External id": 980797,"Record function id": 0, "Ev Idx": 3388 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2338710, "tid": 2379450, + "ts": 6345938263837.589, "dur": 734.886, + "args": { + "External id": 980798,"Record function id": 0, "Ev Idx": 3389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938263902.492, "dur": 6.889, + "args": { + "External id": 980799,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938263926.458, "dur": 41.566, + "args": { + "External id": 980800,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938263937.409, "dur": 1.496, + "args": { + "External id": 980801,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938263941.932, "dur": 1.235, + "args": { + "External id": 980802,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938263945.096, "dur": 0.616, + "args": { + "External id": 980803,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938263947.567, "dur": 0.423, + "args": { + "External id": 980804,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938263949.727, "dur": 0.732, + "args": { + "External id": 980805,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938263952.666, "dur": 2.884, + "args": { + "External id": 980806,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938263957.432, "dur": 0.600, + "args": { + "External id": 980807,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938263959.695, "dur": 0.657, + "args": { + "External id": 980808,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938263961.873, "dur": 0.412, + "args": { + "External id": 980809,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938263979.105, "dur": 116.248, + "args": { + "External id": 980810,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345938264142.192, "dur": 147.324, + "args": { + "External id": 980811,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938264157.772, "dur": 5.540, + "args": { + "External id": 980812,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345938264170.177, "dur": 13.040, + "args": { + "External id": 980813,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345938264175.432, "dur": 7.296, + "args": { + "External id": 980814,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938264179.869, "dur": 0.968, + "args": { + "External id": 980815,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938264191.819, "dur": 31.154, + "args": { + "External id": 980816,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938264195.305, "dur": 0.509, + "args": { + "External id": 980817,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938264197.936, "dur": 0.337, + "args": { + "External id": 980818,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938264200.452, "dur": 2.774, + "args": { + "External id": 980819,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938264205.075, "dur": 0.462, + "args": { + "External id": 980820,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938264207.643, "dur": 0.387, + "args": { + "External id": 980821,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938264210.066, "dur": 0.610, + "args": { + "External id": 980822,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938264212.376, "dur": 0.565, + "args": { + "External id": 980823,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938264214.867, "dur": 0.526, + "args": { + "External id": 980824,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938264217.044, "dur": 0.408, + "args": { + "External id": 980825,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938264237.857, "dur": 42.409, + "args": { + "External id": 980826,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938264350.693, "dur": 144.931, + "args": { + "External id": 980827,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938264378.391, "dur": 112.809, + "args": { + "External id": 980828,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3419, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938264390.108, "dur": 95.259, + "args": { + "External id": 980829,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938264511.277, "dur": 2.137, + "args": { + "External id": 980830,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3421, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938264605.406, "dur": 2050.213, + "args": { + "External id": 980831,"Sequence number": 10552272, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3422 + } + }, + { + "ph": "f", "id": 194, "pid": 2338710, "tid": 2379450, "ts": 6345938264605.406, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938264728.127, "dur": 125.308, + "args": { + "External id": 980832,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938264898.003, "dur": 45.140, + "args": { + "External id": 980833,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345938264964.635, "dur": 84.673, + "args": { + "External id": 980834,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938265109.878, "dur": 46.570, + "args": { + "External id": 980835,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938265165.608, "dur": 38.228, + "args": { + "External id": 980836,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938265212.844, "dur": 32.212, + "args": { + "External id": 980837,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938265252.873, "dur": 32.983, + "args": { + "External id": 980838,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938265317.512, "dur": 29.732, + "args": { + "External id": 980839,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938265368.123, "dur": 31.735, + "args": { + "External id": 980840,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938265422.122, "dur": 23.287, + "args": { + "External id": 980841,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938265459.347, "dur": 17.261, + "args": { + "External id": 980842,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938265486.457, "dur": 43.634, + "args": { + "External id": 980843,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938265534.228, "dur": 37.411, + "args": { + "External id": 980844,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345938265606.781, "dur": 319.376, + "args": { + "External id": 980845,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938265697.752, "dur": 6.512, + "args": { + "External id": 980846,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938265706.750, "dur": 2.903, + "args": { + "External id": 980847,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938265711.493, "dur": 2.484, + "args": { + "External id": 980848,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938265715.235, "dur": 2.478, + "args": { + "External id": 980849,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938265767.159, "dur": 6.102, + "args": { + "External id": 980850,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938265769.623, "dur": 3.394, + "args": { + "External id": 980851,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938265788.303, "dur": 50.608, + "args": { + "External id": 980852,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938265797.839, "dur": 4.925, + "args": { + "External id": 980853,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938265841.358, "dur": 2.526, + "args": { + "External id": 980854,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938265842.948, "dur": 0.823, + "args": { + "External id": 980855,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938265845.482, "dur": 17.142, + "args": { + "External id": 980856,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938265848.115, "dur": 0.792, + "args": { + "External id": 980857,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938265965.747, "dur": 36.738, + "args": { + "External id": 980858,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938266047.822, "dur": 62.670, + "args": { + "External id": 980859,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938266124.356, "dur": 65.652, + "args": { + "External id": 980860,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938266198.725, "dur": 52.286, + "args": { + "External id": 980861,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938266264.522, "dur": 27.079, + "args": { + "External id": 980862,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938266299.409, "dur": 37.125, + "args": { + "External id": 980863,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938266345.426, "dur": 32.077, + "args": { + "External id": 980864,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938266386.157, "dur": 34.202, + "args": { + "External id": 980865,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345938266441.785, "dur": 29.925, + "args": { + "External id": 980866,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938266490.126, "dur": 28.695, + "args": { + "External id": 980867,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938266534.503, "dur": 20.165, + "args": { + "External id": 980868,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938266572.859, "dur": 16.166, + "args": { + "External id": 980869,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345938266602.878, "dur": 17.765, + "args": { + "External id": 980870,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938266703.422, "dur": 18.908, + "args": { + "External id": 980871,"Record function id": 0, "Ev Idx": 3462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938266707.517, "dur": 13.579, + "args": { + "External id": 980872,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938266712.573, "dur": 7.474, + "args": { + "External id": 980873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938266715.144, "dur": 4.766, + "args": { + "External id": 980874,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938266727.145, "dur": 5.578, + "args": { + "External id": 980875,"Record function id": 0, "Ev Idx": 3466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938266728.404, "dur": 3.629, + "args": { + "External id": 980876,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938266729.268, "dur": 2.243, + "args": { + "External id": 980877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938266730.129, "dur": 1.220, + "args": { + "External id": 980878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938266736.782, "dur": 7.046, + "args": { + "External id": 980879,"Record function id": 0, "Ev Idx": 3470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938266738.128, "dur": 5.200, + "args": { + "External id": 980880,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938266738.844, "dur": 3.985, + "args": { + "External id": 980881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938266739.239, "dur": 3.483, + "args": { + "External id": 980882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938266747.917, "dur": 4.888, + "args": { + "External id": 980883,"Record function id": 0, "Ev Idx": 3474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938266749.547, "dur": 2.780, + "args": { + "External id": 980884,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938266750.146, "dur": 1.669, + "args": { + "External id": 980885,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938266750.875, "dur": 0.842, + "args": { + "External id": 980886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938266756.814, "dur": 4.970, + "args": { + "External id": 980887,"Record function id": 0, "Ev Idx": 3478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938266757.953, "dur": 3.336, + "args": { + "External id": 980888,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938266758.911, "dur": 1.891, + "args": { + "External id": 980889,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938266759.727, "dur": 0.997, + "args": { + "External id": 980890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938266765.581, "dur": 4.374, + "args": { + "External id": 980891,"Record function id": 0, "Ev Idx": 3482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938266766.895, "dur": 2.552, + "args": { + "External id": 980892,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938266767.465, "dur": 1.509, + "args": { + "External id": 980893,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938266768.138, "dur": 0.671, + "args": { + "External id": 980894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938266773.667, "dur": 4.032, + "args": { + "External id": 980895,"Record function id": 0, "Ev Idx": 3486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938266774.889, "dur": 2.344, + "args": { + "External id": 980896,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938266775.636, "dur": 1.127, + "args": { + "External id": 980897,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938266775.980, "dur": 0.670, + "args": { + "External id": 980898,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938266781.349, "dur": 4.014, + "args": { + "External id": 980899,"Record function id": 0, "Ev Idx": 3490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938266782.781, "dur": 2.118, + "args": { + "External id": 980900,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938266783.330, "dur": 1.097, + "args": { + "External id": 980901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938266783.654, "dur": 0.698, + "args": { + "External id": 980902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938266789.011, "dur": 4.101, + "args": { + "External id": 980903,"Record function id": 0, "Ev Idx": 3494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938266790.080, "dur": 2.552, + "args": { + "External id": 980904,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938266790.850, "dur": 1.318, + "args": { + "External id": 980905,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938266791.300, "dur": 0.732, + "args": { + "External id": 980906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938266797.852, "dur": 65815.611, + "args": { + "External id": 980907,"Record function id": 0, "Sequence number": 10552271, "Fwd thread id": 1, "Ev Idx": 3498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938266799.332, "dur": 65804.351, + "args": { + "External id": 980908,"Sequence number": 10552271, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3499 + } + }, + { + "ph": "f", "id": 195, "pid": 2338710, "tid": 2379450, "ts": 6345938266799.332, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.14)", "pid": 2338710, "tid": 2379450, + "ts": 6345938266832.884, "dur": 44.440, + "args": { + "External id": 980909,"Record function id": 0, "Ev Idx": 3500 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.14)", "pid": 2338710, "tid": 2379450, + "ts": 6345938266886.342, "dur": 68.559, + "args": { + "External id": 980910,"Record function id": 0, "Ev Idx": 3501 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.14)", "pid": 2338710, "tid": 2379450, + "ts": 6345938266961.711, "dur": 65632.207, + "args": { + "External id": 980911,"Record function id": 0, "Ev Idx": 3502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938267138.582, "dur": 10.815, + "args": { + "External id": 980912,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938267163.310, "dur": 8.117, + "args": { + "External id": 980913,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938267190.318, "dur": 64253.710, + "args": { + "External id": 980914,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938267207.549, "dur": 64219.569, + "args": { + "External id": 980915,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938267349.626, "dur": 24.463, + "args": { + "External id": 980916,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938267399.173, "dur": 63968.011, + "args": { + "External id": 980917,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938267403.956, "dur": 63961.922, + "args": { + "External id": 980918,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938267409.762, "dur": 11.715, + "args": { + "External id": 980919,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938267424.448, "dur": 63934.524, + "args": { + "External id": 980920,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938331585.993, "dur": 15.970, + "args": { + "External id": 980921,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938331591.369, "dur": 10.030, + "args": { + "External id": 980922,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938331647.935, "dur": 574.042, + "args": { + "External id": 980923,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938331684.453, "dur": 529.421, + "args": { + "External id": 980924,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3515, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938331700.872, "dur": 504.153, + "args": { + "External id": 980925,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938332250.534, "dur": 2.915, + "args": { + "External id": 980926,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3517, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938332332.896, "dur": 9.112, + "args": { + "External id": 980927,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938332402.475, "dur": 1.566, + "args": { + "External id": 980928,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938332423.441, "dur": 4.710, + "args": { + "External id": 980929,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938332441.114, "dur": 1.307, + "args": { + "External id": 980930,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938332455.966, "dur": 1.079, + "args": { + "External id": 980931,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938332469.394, "dur": 1.358, + "args": { + "External id": 980932,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938332484.251, "dur": 3.017, + "args": { + "External id": 980933,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938332498.959, "dur": 2.706, + "args": { + "External id": 980934,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938332513.643, "dur": 0.913, + "args": { + "External id": 980935,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938332631.287, "dur": 3520.785, + "args": { + "External id": 980936,"Record function id": 0, "Ev Idx": 3527 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.13)", "pid": 2338710, "tid": 2379450, + "ts": 6345938332653.962, "dur": 1299.946, + "args": { + "External id": 980937,"Record function id": 0, "Ev Idx": 3528 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2338710, "tid": 2379450, + "ts": 6345938332671.997, "dur": 472.165, + "args": { + "External id": 980938,"Record function id": 0, "Ev Idx": 3529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938332764.153, "dur": 6.273, + "args": { + "External id": 980939,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938332773.919, "dur": 1.371, + "args": { + "External id": 980940,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938332777.312, "dur": 3.271, + "args": { + "External id": 980941,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938332782.869, "dur": 0.935, + "args": { + "External id": 980942,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938332785.850, "dur": 0.912, + "args": { + "External id": 980943,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938332788.890, "dur": 0.979, + "args": { + "External id": 980944,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938332791.734, "dur": 2.836, + "args": { + "External id": 980945,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938332796.087, "dur": 0.953, + "args": { + "External id": 980946,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938332798.555, "dur": 0.911, + "args": { + "External id": 980947,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938332801.153, "dur": 0.870, + "args": { + "External id": 980948,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938332823.073, "dur": 275.629, + "args": { + "External id": 980949,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938332843.480, "dur": 207.729, + "args": { + "External id": 980950,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938332872.315, "dur": 20.923, + "args": { + "External id": 980951,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938332899.166, "dur": 89.586, + "args": { + "External id": 980952,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938332902.700, "dur": 85.652, + "args": { + "External id": 980953,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938332908.806, "dur": 7.354, + "args": { + "External id": 980954,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938332918.322, "dur": 69.405, + "args": { + "External id": 980955,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3546 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.12", "pid": 2338710, "tid": 2379450, + "ts": 6345938333247.825, "dur": 696.596, + "args": { + "External id": 980956,"Record function id": 0, "Ev Idx": 3547 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2338710, "tid": 2379450, + "ts": 6345938333268.403, "dur": 661.387, + "args": { + "External id": 980957,"Record function id": 0, "Ev Idx": 3548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938333338.377, "dur": 8.478, + "args": { + "External id": 980958,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938333368.551, "dur": 44.408, + "args": { + "External id": 980959,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938333375.151, "dur": 1.929, + "args": { + "External id": 980960,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938333379.467, "dur": 0.808, + "args": { + "External id": 980961,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938333382.229, "dur": 0.559, + "args": { + "External id": 980962,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938333384.689, "dur": 0.623, + "args": { + "External id": 980963,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938333386.907, "dur": 0.791, + "args": { + "External id": 980964,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938333389.549, "dur": 3.315, + "args": { + "External id": 980965,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938333394.842, "dur": 0.449, + "args": { + "External id": 980966,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938333404.000, "dur": 0.454, + "args": { + "External id": 980967,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938333406.472, "dur": 0.534, + "args": { + "External id": 980968,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938333430.617, "dur": 60.416, + "args": { + "External id": 980969,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345938333532.247, "dur": 131.461, + "args": { + "External id": 980970,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938333548.458, "dur": 3.608, + "args": { + "External id": 980971,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345938333558.759, "dur": 11.952, + "args": { + "External id": 980972,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345938333563.748, "dur": 6.471, + "args": { + "External id": 980973,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938333568.144, "dur": 0.656, + "args": { + "External id": 980974,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938333579.096, "dur": 28.282, + "args": { + "External id": 980975,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938333581.885, "dur": 0.509, + "args": { + "External id": 980976,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938333584.179, "dur": 0.475, + "args": { + "External id": 980977,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938333586.864, "dur": 2.565, + "args": { + "External id": 980978,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938333591.273, "dur": 0.383, + "args": { + "External id": 980979,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938333593.604, "dur": 0.458, + "args": { + "External id": 980980,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938333595.758, "dur": 0.442, + "args": { + "External id": 980981,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938333597.793, "dur": 0.384, + "args": { + "External id": 980982,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938333599.863, "dur": 0.416, + "args": { + "External id": 980983,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938333602.257, "dur": 0.407, + "args": { + "External id": 980984,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938333619.108, "dur": 35.700, + "args": { + "External id": 980985,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938333718.539, "dur": 134.095, + "args": { + "External id": 980986,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938333744.827, "dur": 103.667, + "args": { + "External id": 980987,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3578, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938333756.650, "dur": 87.125, + "args": { + "External id": 980988,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938333869.652, "dur": 2.056, + "args": { + "External id": 980989,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3580, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938333962.344, "dur": 2164.977, + "args": { + "External id": 980990,"Sequence number": 10552270, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3581 + } + }, + { + "ph": "f", "id": 196, "pid": 2338710, "tid": 2379450, "ts": 6345938333962.344, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938334166.674, "dur": 134.894, + "args": { + "External id": 980991,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938334349.436, "dur": 47.294, + "args": { + "External id": 980992,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345938334460.971, "dur": 65.406, + "args": { + "External id": 980993,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938334540.878, "dur": 38.568, + "args": { + "External id": 980994,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938334588.250, "dur": 38.731, + "args": { + "External id": 980995,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938334635.959, "dur": 36.871, + "args": { + "External id": 980996,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938334680.820, "dur": 36.599, + "args": { + "External id": 980997,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938334745.235, "dur": 26.494, + "args": { + "External id": 980998,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938334790.768, "dur": 33.952, + "args": { + "External id": 980999,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938334846.712, "dur": 23.049, + "args": { + "External id": 981000,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938334883.484, "dur": 18.484, + "args": { + "External id": 981001,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938334912.202, "dur": 43.031, + "args": { + "External id": 981002,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938334959.028, "dur": 36.927, + "args": { + "External id": 981003,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345938335050.810, "dur": 370.513, + "args": { + "External id": 981004,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938335205.443, "dur": 10.516, + "args": { + "External id": 981005,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938335219.128, "dur": 3.090, + "args": { + "External id": 981006,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938335223.663, "dur": 2.172, + "args": { + "External id": 981007,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938335227.241, "dur": 2.194, + "args": { + "External id": 981008,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938335291.604, "dur": 6.351, + "args": { + "External id": 981009,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938335294.060, "dur": 3.596, + "args": { + "External id": 981010,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938335300.793, "dur": 43.343, + "args": { + "External id": 981011,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938335310.836, "dur": 4.654, + "args": { + "External id": 981012,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938335346.202, "dur": 2.419, + "args": { + "External id": 981013,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938335347.646, "dur": 0.809, + "args": { + "External id": 981014,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938335350.362, "dur": 16.921, + "args": { + "External id": 981015,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938335352.620, "dur": 0.598, + "args": { + "External id": 981016,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938335464.184, "dur": 33.015, + "args": { + "External id": 981017,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938335519.149, "dur": 18.275, + "args": { + "External id": 981018,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938335546.736, "dur": 61.799, + "args": { + "External id": 981019,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938335616.464, "dur": 46.922, + "args": { + "External id": 981020,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938335675.405, "dur": 26.326, + "args": { + "External id": 981021,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938335709.146, "dur": 35.831, + "args": { + "External id": 981022,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938335753.872, "dur": 34.444, + "args": { + "External id": 981023,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938335795.985, "dur": 35.453, + "args": { + "External id": 981024,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345938335851.866, "dur": 27.420, + "args": { + "External id": 981025,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938335896.316, "dur": 26.054, + "args": { + "External id": 981026,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938335937.390, "dur": 19.152, + "args": { + "External id": 981027,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938335976.070, "dur": 16.270, + "args": { + "External id": 981028,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345938336006.088, "dur": 45.041, + "args": { + "External id": 981029,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938336180.055, "dur": 18.466, + "args": { + "External id": 981030,"Record function id": 0, "Ev Idx": 3621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938336183.788, "dur": 13.683, + "args": { + "External id": 981031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938336188.988, "dur": 7.366, + "args": { + "External id": 981032,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938336191.275, "dur": 4.850, + "args": { + "External id": 981033,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938336203.195, "dur": 5.750, + "args": { + "External id": 981034,"Record function id": 0, "Ev Idx": 3625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938336204.448, "dur": 3.974, + "args": { + "External id": 981035,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938336205.500, "dur": 2.408, + "args": { + "External id": 981036,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938336206.531, "dur": 1.225, + "args": { + "External id": 981037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938336212.805, "dur": 7.930, + "args": { + "External id": 981038,"Record function id": 0, "Ev Idx": 3629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938336214.309, "dur": 5.951, + "args": { + "External id": 981039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938336215.061, "dur": 4.679, + "args": { + "External id": 981040,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938336215.865, "dur": 3.788, + "args": { + "External id": 981041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938336224.523, "dur": 4.777, + "args": { + "External id": 981042,"Record function id": 0, "Ev Idx": 3633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938336225.850, "dur": 2.946, + "args": { + "External id": 981043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938336226.649, "dur": 1.655, + "args": { + "External id": 981044,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938336227.299, "dur": 0.927, + "args": { + "External id": 981045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938336233.155, "dur": 5.048, + "args": { + "External id": 981046,"Record function id": 0, "Ev Idx": 3637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938336234.428, "dur": 3.295, + "args": { + "External id": 981047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938336235.431, "dur": 1.828, + "args": { + "External id": 981048,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938336236.327, "dur": 0.809, + "args": { + "External id": 981049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938336242.051, "dur": 4.939, + "args": { + "External id": 981050,"Record function id": 0, "Ev Idx": 3641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938336243.295, "dur": 3.217, + "args": { + "External id": 981051,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938336244.254, "dur": 1.761, + "args": { + "External id": 981052,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938336245.079, "dur": 0.813, + "args": { + "External id": 981053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938336250.953, "dur": 3.864, + "args": { + "External id": 981054,"Record function id": 0, "Ev Idx": 3645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938336252.054, "dur": 2.296, + "args": { + "External id": 981055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938336252.658, "dur": 1.222, + "args": { + "External id": 981056,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938336253.008, "dur": 0.744, + "args": { + "External id": 981057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938336258.484, "dur": 4.683, + "args": { + "External id": 981058,"Record function id": 0, "Ev Idx": 3649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938336259.945, "dur": 2.740, + "args": { + "External id": 981059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938336260.692, "dur": 1.495, + "args": { + "External id": 981060,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938336261.400, "dur": 0.711, + "args": { + "External id": 981061,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938336266.783, "dur": 4.491, + "args": { + "External id": 981062,"Record function id": 0, "Ev Idx": 3653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938336268.060, "dur": 2.638, + "args": { + "External id": 981063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938336268.639, "dur": 1.563, + "args": { + "External id": 981064,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938336269.298, "dur": 0.801, + "args": { + "External id": 981065,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938336275.720, "dur": 64089.402, + "args": { + "External id": 981066,"Record function id": 0, "Sequence number": 10552269, "Fwd thread id": 1, "Ev Idx": 3657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938336277.159, "dur": 64077.002, + "args": { + "External id": 981067,"Sequence number": 10552269, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3658 + } + }, + { + "ph": "f", "id": 197, "pid": 2338710, "tid": 2379450, "ts": 6345938336277.159, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.13)", "pid": 2338710, "tid": 2379450, + "ts": 6345938336313.215, "dur": 43.505, + "args": { + "External id": 981068,"Record function id": 0, "Ev Idx": 3659 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.13)", "pid": 2338710, "tid": 2379450, + "ts": 6345938336365.675, "dur": 68.500, + "args": { + "External id": 981069,"Record function id": 0, "Ev Idx": 3660 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.13)", "pid": 2338710, "tid": 2379450, + "ts": 6345938336440.711, "dur": 63902.515, + "args": { + "External id": 981070,"Record function id": 0, "Ev Idx": 3661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938336544.640, "dur": 7.990, + "args": { + "External id": 981071,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938336564.705, "dur": 7.270, + "args": { + "External id": 981072,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938336590.655, "dur": 62698.945, + "args": { + "External id": 981073,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938336606.783, "dur": 62666.105, + "args": { + "External id": 981074,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938336734.368, "dur": 22.798, + "args": { + "External id": 981075,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938336781.781, "dur": 62433.320, + "args": { + "External id": 981076,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938336786.123, "dur": 62427.676, + "args": { + "External id": 981077,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938336792.131, "dur": 11.385, + "args": { + "External id": 981078,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938336806.762, "dur": 62400.389, + "args": { + "External id": 981079,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938399432.549, "dur": 16.483, + "args": { + "External id": 981080,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938399437.965, "dur": 10.493, + "args": { + "External id": 981081,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938399489.299, "dur": 426.648, + "args": { + "External id": 981082,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938399527.392, "dur": 381.715, + "args": { + "External id": 981083,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3674, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938399542.400, "dur": 359.506, + "args": { + "External id": 981084,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938399941.650, "dur": 2.831, + "args": { + "External id": 981085,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3676, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938400045.503, "dur": 42.957, + "args": { + "External id": 981086,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938400152.855, "dur": 2.241, + "args": { + "External id": 981087,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938400174.948, "dur": 4.882, + "args": { + "External id": 981088,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938400193.390, "dur": 1.599, + "args": { + "External id": 981089,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938400208.065, "dur": 1.318, + "args": { + "External id": 981090,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938400221.229, "dur": 1.320, + "args": { + "External id": 981091,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938400235.744, "dur": 3.500, + "args": { + "External id": 981092,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938400251.735, "dur": 2.529, + "args": { + "External id": 981093,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938400266.064, "dur": 1.010, + "args": { + "External id": 981094,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938400386.104, "dur": 3494.806, + "args": { + "External id": 981095,"Record function id": 0, "Ev Idx": 3686 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.12)", "pid": 2338710, "tid": 2379450, + "ts": 6345938400411.022, "dur": 1300.070, + "args": { + "External id": 981096,"Record function id": 0, "Ev Idx": 3687 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2338710, "tid": 2379450, + "ts": 6345938400431.575, "dur": 424.441, + "args": { + "External id": 981097,"Record function id": 0, "Ev Idx": 3688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938400539.150, "dur": 6.439, + "args": { + "External id": 981098,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938400549.149, "dur": 1.437, + "args": { + "External id": 981099,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938400552.516, "dur": 3.085, + "args": { + "External id": 981100,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938400557.390, "dur": 1.221, + "args": { + "External id": 981101,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938400560.181, "dur": 0.941, + "args": { + "External id": 981102,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938400563.542, "dur": 0.876, + "args": { + "External id": 981103,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938400566.122, "dur": 2.350, + "args": { + "External id": 981104,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938400569.927, "dur": 0.838, + "args": { + "External id": 981105,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938400572.286, "dur": 0.954, + "args": { + "External id": 981106,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938400574.892, "dur": 0.897, + "args": { + "External id": 981107,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938400596.725, "dur": 219.869, + "args": { + "External id": 981108,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938400641.570, "dur": 168.100, + "args": { + "External id": 981109,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938400660.303, "dur": 20.695, + "args": { + "External id": 981110,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938400686.909, "dur": 86.353, + "args": { + "External id": 981111,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938400690.038, "dur": 82.548, + "args": { + "External id": 981112,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938400695.902, "dur": 7.636, + "args": { + "External id": 981113,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938400705.881, "dur": 65.983, + "args": { + "External id": 981114,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3705 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.11", "pid": 2338710, "tid": 2379450, + "ts": 6345938400946.684, "dur": 755.211, + "args": { + "External id": 981115,"Record function id": 0, "Ev Idx": 3706 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2338710, "tid": 2379450, + "ts": 6345938400968.241, "dur": 718.805, + "args": { + "External id": 981116,"Record function id": 0, "Ev Idx": 3707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938401094.213, "dur": 9.231, + "args": { + "External id": 981117,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938401124.650, "dur": 36.941, + "args": { + "External id": 981118,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938401130.971, "dur": 2.211, + "args": { + "External id": 981119,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938401135.954, "dur": 0.703, + "args": { + "External id": 981120,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938401138.469, "dur": 0.581, + "args": { + "External id": 981121,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938401140.710, "dur": 0.436, + "args": { + "External id": 981122,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938401142.817, "dur": 0.834, + "args": { + "External id": 981123,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938401145.641, "dur": 3.120, + "args": { + "External id": 981124,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938401150.694, "dur": 0.458, + "args": { + "External id": 981125,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938401152.635, "dur": 0.390, + "args": { + "External id": 981126,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938401154.932, "dur": 0.475, + "args": { + "External id": 981127,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938401173.214, "dur": 55.964, + "args": { + "External id": 981128,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345938401275.356, "dur": 136.070, + "args": { + "External id": 981129,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938401289.425, "dur": 4.480, + "args": { + "External id": 981130,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345938401301.022, "dur": 12.142, + "args": { + "External id": 981131,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345938401306.246, "dur": 6.426, + "args": { + "External id": 981132,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938401310.419, "dur": 0.710, + "args": { + "External id": 981133,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938401321.092, "dur": 29.616, + "args": { + "External id": 981134,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938401323.818, "dur": 0.681, + "args": { + "External id": 981135,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938401326.705, "dur": 0.423, + "args": { + "External id": 981136,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938401328.971, "dur": 2.805, + "args": { + "External id": 981137,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938401333.951, "dur": 0.436, + "args": { + "External id": 981138,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938401336.186, "dur": 0.479, + "args": { + "External id": 981139,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938401338.583, "dur": 0.587, + "args": { + "External id": 981140,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938401341.141, "dur": 0.601, + "args": { + "External id": 981141,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938401343.521, "dur": 0.452, + "args": { + "External id": 981142,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938401345.798, "dur": 0.409, + "args": { + "External id": 981143,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938401364.493, "dur": 37.774, + "args": { + "External id": 981144,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938401467.523, "dur": 133.973, + "args": { + "External id": 981145,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938401494.053, "dur": 103.177, + "args": { + "External id": 981146,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3737, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938401505.255, "dur": 87.324, + "args": { + "External id": 981147,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938401622.355, "dur": 2.132, + "args": { + "External id": 981148,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3739, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938401719.805, "dur": 2138.042, + "args": { + "External id": 981149,"Sequence number": 10552268, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3740 + } + }, + { + "ph": "f", "id": 198, "pid": 2338710, "tid": 2379450, "ts": 6345938401719.805, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938401849.038, "dur": 128.987, + "args": { + "External id": 981150,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938402046.070, "dur": 110.004, + "args": { + "External id": 981151,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345938402184.843, "dur": 75.341, + "args": { + "External id": 981152,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938402275.007, "dur": 39.520, + "args": { + "External id": 981153,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938402323.631, "dur": 40.205, + "args": { + "External id": 981154,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938402372.908, "dur": 34.163, + "args": { + "External id": 981155,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938402414.516, "dur": 34.959, + "args": { + "External id": 981156,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938402482.382, "dur": 26.346, + "args": { + "External id": 981157,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938402528.981, "dur": 32.666, + "args": { + "External id": 981158,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938402583.813, "dur": 24.189, + "args": { + "External id": 981159,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938402621.758, "dur": 18.209, + "args": { + "External id": 981160,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938402650.666, "dur": 46.220, + "args": { + "External id": 981161,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938402701.213, "dur": 38.416, + "args": { + "External id": 981162,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345938402786.249, "dur": 402.320, + "args": { + "External id": 981163,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938402893.708, "dur": 9.646, + "args": { + "External id": 981164,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938402905.815, "dur": 2.972, + "args": { + "External id": 981165,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938402910.193, "dur": 2.248, + "args": { + "External id": 981166,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938402913.595, "dur": 2.405, + "args": { + "External id": 981167,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938402974.690, "dur": 6.188, + "args": { + "External id": 981168,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938402977.183, "dur": 3.446, + "args": { + "External id": 981169,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938402983.380, "dur": 109.637, + "args": { + "External id": 981170,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938402989.256, "dur": 7.767, + "args": { + "External id": 981171,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938403098.318, "dur": 2.793, + "args": { + "External id": 981172,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938403099.783, "dur": 1.224, + "args": { + "External id": 981173,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938403102.702, "dur": 22.642, + "args": { + "External id": 981174,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938403105.194, "dur": 0.716, + "args": { + "External id": 981175,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938403232.864, "dur": 38.830, + "args": { + "External id": 981176,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938403294.603, "dur": 20.527, + "args": { + "External id": 981177,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938403325.668, "dur": 68.356, + "args": { + "External id": 981178,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938403402.628, "dur": 53.274, + "args": { + "External id": 981179,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938403469.202, "dur": 26.249, + "args": { + "External id": 981180,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938403502.889, "dur": 37.892, + "args": { + "External id": 981181,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938403549.590, "dur": 33.708, + "args": { + "External id": 981182,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938403591.435, "dur": 34.617, + "args": { + "External id": 981183,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345938403648.027, "dur": 27.936, + "args": { + "External id": 981184,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938403693.592, "dur": 28.009, + "args": { + "External id": 981185,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938403736.322, "dur": 19.895, + "args": { + "External id": 981186,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938403774.987, "dur": 16.366, + "args": { + "External id": 981187,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345938403805.439, "dur": 17.728, + "args": { + "External id": 981188,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938403906.739, "dur": 17.305, + "args": { + "External id": 981189,"Record function id": 0, "Ev Idx": 3780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938403910.449, "dur": 12.419, + "args": { + "External id": 981190,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938403915.046, "dur": 6.811, + "args": { + "External id": 981191,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938403917.145, "dur": 4.535, + "args": { + "External id": 981192,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938403928.813, "dur": 5.706, + "args": { + "External id": 981193,"Record function id": 0, "Ev Idx": 3784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938403930.347, "dur": 3.625, + "args": { + "External id": 981194,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938403931.272, "dur": 2.180, + "args": { + "External id": 981195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938403932.063, "dur": 1.256, + "args": { + "External id": 981196,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938403938.511, "dur": 6.975, + "args": { + "External id": 981197,"Record function id": 0, "Ev Idx": 3788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938403939.725, "dur": 5.268, + "args": { + "External id": 981198,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938403940.453, "dur": 4.051, + "args": { + "External id": 981199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938403940.864, "dur": 3.548, + "args": { + "External id": 981200,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938403949.240, "dur": 4.022, + "args": { + "External id": 981201,"Record function id": 0, "Ev Idx": 3792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938403950.412, "dur": 2.364, + "args": { + "External id": 981202,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938403951.053, "dur": 1.277, + "args": { + "External id": 981203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938403951.419, "dur": 0.833, + "args": { + "External id": 981204,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938403956.952, "dur": 4.172, + "args": { + "External id": 981205,"Record function id": 0, "Ev Idx": 3796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938403958.241, "dur": 2.364, + "args": { + "External id": 981206,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938403959.058, "dur": 1.102, + "args": { + "External id": 981207,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938403959.408, "dur": 0.639, + "args": { + "External id": 981208,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938403964.932, "dur": 4.467, + "args": { + "External id": 981209,"Record function id": 0, "Ev Idx": 3800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938403966.273, "dur": 2.608, + "args": { + "External id": 981210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938403966.819, "dur": 1.617, + "args": { + "External id": 981211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938403967.406, "dur": 0.907, + "args": { + "External id": 981212,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938403973.284, "dur": 3.955, + "args": { + "External id": 981213,"Record function id": 0, "Ev Idx": 3804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938403974.403, "dur": 2.363, + "args": { + "External id": 981214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938403974.960, "dur": 1.365, + "args": { + "External id": 981215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938403975.474, "dur": 0.719, + "args": { + "External id": 981216,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938403980.913, "dur": 4.579, + "args": { + "External id": 981217,"Record function id": 0, "Ev Idx": 3808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938403982.049, "dur": 2.959, + "args": { + "External id": 981218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938403982.995, "dur": 1.564, + "args": { + "External id": 981219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938403983.713, "dur": 0.771, + "args": { + "External id": 981220,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938403989.212, "dur": 4.217, + "args": { + "External id": 981221,"Record function id": 0, "Ev Idx": 3812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938403990.220, "dur": 2.687, + "args": { + "External id": 981222,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938403991.166, "dur": 1.301, + "args": { + "External id": 981223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938403991.670, "dur": 0.691, + "args": { + "External id": 981224,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938403997.830, "dur": 66564.393, + "args": { + "External id": 981225,"Record function id": 0, "Sequence number": 10552267, "Fwd thread id": 1, "Ev Idx": 3816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938403999.296, "dur": 66551.065, + "args": { + "External id": 981226,"Sequence number": 10552267, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3817 + } + }, + { + "ph": "f", "id": 199, "pid": 2338710, "tid": 2379450, "ts": 6345938403999.296, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.12)", "pid": 2338710, "tid": 2379450, + "ts": 6345938404097.453, "dur": 48.854, + "args": { + "External id": 981227,"Record function id": 0, "Ev Idx": 3818 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.12)", "pid": 2338710, "tid": 2379450, + "ts": 6345938404156.545, "dur": 74.070, + "args": { + "External id": 981228,"Record function id": 0, "Ev Idx": 3819 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.12)", "pid": 2338710, "tid": 2379450, + "ts": 6345938404237.467, "dur": 66302.815, + "args": { + "External id": 981229,"Record function id": 0, "Ev Idx": 3820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938404345.612, "dur": 9.155, + "args": { + "External id": 981230,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938404367.267, "dur": 7.915, + "args": { + "External id": 981231,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938404392.875, "dur": 65022.879, + "args": { + "External id": 981232,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938404408.817, "dur": 64989.859, + "args": { + "External id": 981233,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938404530.788, "dur": 22.526, + "args": { + "External id": 981234,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938404578.512, "dur": 64763.920, + "args": { + "External id": 981235,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938404583.672, "dur": 64757.461, + "args": { + "External id": 981236,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938404589.181, "dur": 12.060, + "args": { + "External id": 981237,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938404604.568, "dur": 64729.551, + "args": { + "External id": 981238,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938469557.859, "dur": 16.822, + "args": { + "External id": 981239,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938469563.174, "dur": 10.790, + "args": { + "External id": 981240,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938469616.434, "dur": 559.015, + "args": { + "External id": 981241,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938469654.287, "dur": 513.408, + "args": { + "External id": 981242,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3833, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938469670.346, "dur": 488.919, + "args": { + "External id": 981243,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938470203.284, "dur": 2.682, + "args": { + "External id": 981244,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3835, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938470288.442, "dur": 9.088, + "args": { + "External id": 981245,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938470355.994, "dur": 2.740, + "args": { + "External id": 981246,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938470377.414, "dur": 4.418, + "args": { + "External id": 981247,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938470395.219, "dur": 1.150, + "args": { + "External id": 981248,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938470409.590, "dur": 0.903, + "args": { + "External id": 981249,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938470422.554, "dur": 1.164, + "args": { + "External id": 981250,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938470435.059, "dur": 3.141, + "args": { + "External id": 981251,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938470450.332, "dur": 2.625, + "args": { + "External id": 981252,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938470465.448, "dur": 1.157, + "args": { + "External id": 981253,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938470580.496, "dur": 3379.146, + "args": { + "External id": 981254,"Record function id": 0, "Ev Idx": 3845 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.11)", "pid": 2338710, "tid": 2379450, + "ts": 6345938470603.503, "dur": 1287.741, + "args": { + "External id": 981255,"Record function id": 0, "Ev Idx": 3846 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2338710, "tid": 2379450, + "ts": 6345938470621.038, "dur": 421.757, + "args": { + "External id": 981256,"Record function id": 0, "Ev Idx": 3847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938470713.260, "dur": 6.485, + "args": { + "External id": 981257,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938470723.129, "dur": 1.188, + "args": { + "External id": 981258,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938470726.363, "dur": 3.509, + "args": { + "External id": 981259,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938470731.703, "dur": 1.416, + "args": { + "External id": 981260,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938470734.802, "dur": 1.141, + "args": { + "External id": 981261,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938470737.562, "dur": 1.029, + "args": { + "External id": 981262,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938470740.474, "dur": 2.626, + "args": { + "External id": 981263,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938470744.509, "dur": 1.152, + "args": { + "External id": 981264,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938470747.117, "dur": 0.693, + "args": { + "External id": 981265,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938470749.202, "dur": 0.970, + "args": { + "External id": 981266,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938470770.517, "dur": 206.828, + "args": { + "External id": 981267,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938470790.297, "dur": 180.685, + "args": { + "External id": 981268,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938470816.583, "dur": 20.288, + "args": { + "External id": 981269,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938470842.659, "dur": 90.771, + "args": { + "External id": 981270,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938470846.062, "dur": 86.927, + "args": { + "External id": 981271,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938470851.853, "dur": 7.995, + "args": { + "External id": 981272,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938470862.538, "dur": 69.632, + "args": { + "External id": 981273,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3864 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.10", "pid": 2338710, "tid": 2379450, + "ts": 6345938471187.949, "dur": 694.725, + "args": { + "External id": 981274,"Record function id": 0, "Ev Idx": 3865 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2338710, "tid": 2379450, + "ts": 6345938471210.206, "dur": 658.572, + "args": { + "External id": 981275,"Record function id": 0, "Ev Idx": 3866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938471279.268, "dur": 8.793, + "args": { + "External id": 981276,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938471306.076, "dur": 35.137, + "args": { + "External id": 981277,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938471312.346, "dur": 1.916, + "args": { + "External id": 981278,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938471316.984, "dur": 0.607, + "args": { + "External id": 981279,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938471319.337, "dur": 0.592, + "args": { + "External id": 981280,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938471321.794, "dur": 0.752, + "args": { + "External id": 981281,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938471324.454, "dur": 0.498, + "args": { + "External id": 981282,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938471326.606, "dur": 2.615, + "args": { + "External id": 981283,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938471330.828, "dur": 0.547, + "args": { + "External id": 981284,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938471333.154, "dur": 0.493, + "args": { + "External id": 981285,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938471335.358, "dur": 0.425, + "args": { + "External id": 981286,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938471352.287, "dur": 70.093, + "args": { + "External id": 981287,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345938471461.525, "dur": 134.285, + "args": { + "External id": 981288,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938471473.923, "dur": 4.429, + "args": { + "External id": 981289,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345938471488.028, "dur": 12.148, + "args": { + "External id": 981290,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345938471493.050, "dur": 6.622, + "args": { + "External id": 981291,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938471497.389, "dur": 0.611, + "args": { + "External id": 981292,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938471507.891, "dur": 33.650, + "args": { + "External id": 981293,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938471510.574, "dur": 0.893, + "args": { + "External id": 981294,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938471513.437, "dur": 0.340, + "args": { + "External id": 981295,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938471522.312, "dur": 2.295, + "args": { + "External id": 981296,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938471526.064, "dur": 0.570, + "args": { + "External id": 981297,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938471528.273, "dur": 0.496, + "args": { + "External id": 981298,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938471530.431, "dur": 0.534, + "args": { + "External id": 981299,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938471532.389, "dur": 0.505, + "args": { + "External id": 981300,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938471534.432, "dur": 0.442, + "args": { + "External id": 981301,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938471536.721, "dur": 0.518, + "args": { + "External id": 981302,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938471551.666, "dur": 35.740, + "args": { + "External id": 981303,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938471646.756, "dur": 140.543, + "args": { + "External id": 981304,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938471677.674, "dur": 105.473, + "args": { + "External id": 981305,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3896, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938471688.563, "dur": 89.476, + "args": { + "External id": 981306,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938471807.964, "dur": 2.018, + "args": { + "External id": 981307,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3898, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938471899.490, "dur": 2038.205, + "args": { + "External id": 981308,"Sequence number": 10552266, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3899 + } + }, + { + "ph": "f", "id": 200, "pid": 2338710, "tid": 2379450, "ts": 6345938471899.490, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938472091.875, "dur": 135.193, + "args": { + "External id": 981309,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938472280.499, "dur": 49.382, + "args": { + "External id": 981310,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345938472349.710, "dur": 58.839, + "args": { + "External id": 981311,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938472421.808, "dur": 36.120, + "args": { + "External id": 981312,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938472465.265, "dur": 35.556, + "args": { + "External id": 981313,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938472508.003, "dur": 29.870, + "args": { + "External id": 981314,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938472543.865, "dur": 31.235, + "args": { + "External id": 981315,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938472607.270, "dur": 24.989, + "args": { + "External id": 981316,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938472654.939, "dur": 29.836, + "args": { + "External id": 981317,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938472712.025, "dur": 21.738, + "args": { + "External id": 981318,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938472749.543, "dur": 21.199, + "args": { + "External id": 981319,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938472780.811, "dur": 42.047, + "args": { + "External id": 981320,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938472827.276, "dur": 37.082, + "args": { + "External id": 981321,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345938472898.693, "dur": 394.968, + "args": { + "External id": 981322,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938472993.123, "dur": 6.698, + "args": { + "External id": 981323,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938473002.075, "dur": 2.969, + "args": { + "External id": 981324,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938473006.315, "dur": 20.206, + "args": { + "External id": 981325,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938473029.083, "dur": 2.717, + "args": { + "External id": 981326,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938473147.444, "dur": 8.195, + "args": { + "External id": 981327,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938473149.959, "dur": 4.502, + "args": { + "External id": 981328,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938473162.180, "dur": 40.338, + "args": { + "External id": 981329,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938473169.715, "dur": 3.977, + "args": { + "External id": 981330,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938473204.442, "dur": 2.034, + "args": { + "External id": 981331,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938473205.673, "dur": 0.726, + "args": { + "External id": 981332,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938473207.990, "dur": 15.623, + "args": { + "External id": 981333,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938473210.184, "dur": 0.749, + "args": { + "External id": 981334,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938473339.632, "dur": 37.271, + "args": { + "External id": 981335,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938473396.021, "dur": 19.302, + "args": { + "External id": 981336,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938473424.167, "dur": 62.249, + "args": { + "External id": 981337,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938473493.223, "dur": 45.342, + "args": { + "External id": 981338,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938473550.805, "dur": 26.702, + "args": { + "External id": 981339,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938473585.226, "dur": 37.390, + "args": { + "External id": 981340,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938473631.701, "dur": 32.782, + "args": { + "External id": 981341,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938473675.699, "dur": 33.956, + "args": { + "External id": 981342,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345938473729.385, "dur": 30.027, + "args": { + "External id": 981343,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938473774.595, "dur": 29.005, + "args": { + "External id": 981344,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938473819.496, "dur": 19.730, + "args": { + "External id": 981345,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938473856.740, "dur": 16.031, + "args": { + "External id": 981346,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345938473888.366, "dur": 17.136, + "args": { + "External id": 981347,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938473985.759, "dur": 16.861, + "args": { + "External id": 981348,"Record function id": 0, "Ev Idx": 3939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938473989.253, "dur": 12.195, + "args": { + "External id": 981349,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938473994.089, "dur": 6.295, + "args": { + "External id": 981350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938473995.810, "dur": 4.460, + "args": { + "External id": 981351,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938474007.057, "dur": 29.343, + "args": { + "External id": 981352,"Record function id": 0, "Ev Idx": 3943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938474029.774, "dur": 5.634, + "args": { + "External id": 981353,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938474031.413, "dur": 2.962, + "args": { + "External id": 981354,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938474032.247, "dur": 1.794, + "args": { + "External id": 981355,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938474042.233, "dur": 7.691, + "args": { + "External id": 981356,"Record function id": 0, "Ev Idx": 3947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938474043.987, "dur": 5.376, + "args": { + "External id": 981357,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938474044.607, "dur": 4.288, + "args": { + "External id": 981358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938474045.116, "dur": 3.691, + "args": { + "External id": 981359,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938474095.479, "dur": 9.629, + "args": { + "External id": 981360,"Record function id": 0, "Ev Idx": 3951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938474099.298, "dur": 4.953, + "args": { + "External id": 981361,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938474100.625, "dur": 2.621, + "args": { + "External id": 981362,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938474101.640, "dur": 1.390, + "args": { + "External id": 981363,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938474109.059, "dur": 4.993, + "args": { + "External id": 981364,"Record function id": 0, "Ev Idx": 3955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938474110.760, "dur": 2.790, + "args": { + "External id": 981365,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938474111.466, "dur": 1.613, + "args": { + "External id": 981366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938474112.114, "dur": 0.825, + "args": { + "External id": 981367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938474117.720, "dur": 4.512, + "args": { + "External id": 981368,"Record function id": 0, "Ev Idx": 3959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938474119.077, "dur": 2.670, + "args": { + "External id": 981369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938474119.718, "dur": 1.477, + "args": { + "External id": 981370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938474120.058, "dur": 0.991, + "args": { + "External id": 981371,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938474126.307, "dur": 13.321, + "args": { + "External id": 981372,"Record function id": 0, "Ev Idx": 3963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938474127.510, "dur": 11.595, + "args": { + "External id": 981373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938474128.272, "dur": 10.352, + "args": { + "External id": 981374,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938474137.701, "dur": 0.786, + "args": { + "External id": 981375,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938474143.509, "dur": 6.793, + "args": { + "External id": 981376,"Record function id": 0, "Ev Idx": 3967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938474144.668, "dur": 5.126, + "args": { + "External id": 981377,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938474145.276, "dur": 4.007, + "args": { + "External id": 981378,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938474148.390, "dur": 0.750, + "args": { + "External id": 981379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938474154.007, "dur": 4.291, + "args": { + "External id": 981380,"Record function id": 0, "Ev Idx": 3971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938474155.288, "dur": 2.531, + "args": { + "External id": 981381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938474155.940, "dur": 1.398, + "args": { + "External id": 981382,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938474156.500, "dur": 0.733, + "args": { + "External id": 981383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938474162.791, "dur": 64294.618, + "args": { + "External id": 981384,"Record function id": 0, "Sequence number": 10552265, "Fwd thread id": 1, "Ev Idx": 3975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938474164.238, "dur": 64282.210, + "args": { + "External id": 981385,"Sequence number": 10552265, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3976 + } + }, + { + "ph": "f", "id": 201, "pid": 2338710, "tid": 2379450, "ts": 6345938474164.238, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.11)", "pid": 2338710, "tid": 2379450, + "ts": 6345938474203.061, "dur": 44.596, + "args": { + "External id": 981386,"Record function id": 0, "Ev Idx": 3977 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.11)", "pid": 2338710, "tid": 2379450, + "ts": 6345938474256.816, "dur": 71.957, + "args": { + "External id": 981387,"Record function id": 0, "Ev Idx": 3978 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.11)", "pid": 2338710, "tid": 2379450, + "ts": 6345938474335.448, "dur": 64100.541, + "args": { + "External id": 981388,"Record function id": 0, "Ev Idx": 3979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938474449.028, "dur": 9.080, + "args": { + "External id": 981389,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938474469.643, "dur": 7.666, + "args": { + "External id": 981390,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938474495.965, "dur": 62994.249, + "args": { + "External id": 981391,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938474511.614, "dur": 62962.191, + "args": { + "External id": 981392,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938474616.572, "dur": 22.265, + "args": { + "External id": 981393,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938474662.194, "dur": 62751.534, + "args": { + "External id": 981394,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938474666.208, "dur": 62746.037, + "args": { + "External id": 981395,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938474672.041, "dur": 12.018, + "args": { + "External id": 981396,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938474688.733, "dur": 62716.404, + "args": { + "External id": 981397,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938537616.432, "dur": 14.464, + "args": { + "External id": 981398,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938537620.954, "dur": 9.515, + "args": { + "External id": 981399,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938537669.499, "dur": 377.650, + "args": { + "External id": 981400,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938537708.904, "dur": 331.831, + "args": { + "External id": 981401,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3992, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938537722.168, "dur": 310.301, + "args": { + "External id": 981402,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938538111.772, "dur": 3.972, + "args": { + "External id": 981403,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3994, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938538191.335, "dur": 8.134, + "args": { + "External id": 981404,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938538254.908, "dur": 1.514, + "args": { + "External id": 981405,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938538275.415, "dur": 4.663, + "args": { + "External id": 981406,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938538292.880, "dur": 0.956, + "args": { + "External id": 981407,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938538307.042, "dur": 0.843, + "args": { + "External id": 981408,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938538319.517, "dur": 1.023, + "args": { + "External id": 981409,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938538333.886, "dur": 4.482, + "args": { + "External id": 981410,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938538349.557, "dur": 2.670, + "args": { + "External id": 981411,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938538363.396, "dur": 0.770, + "args": { + "External id": 981412,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938538477.725, "dur": 3414.084, + "args": { + "External id": 981413,"Record function id": 0, "Ev Idx": 4004 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.10)", "pid": 2338710, "tid": 2379450, + "ts": 6345938538500.738, "dur": 1274.507, + "args": { + "External id": 981414,"Record function id": 0, "Ev Idx": 4005 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2338710, "tid": 2379450, + "ts": 6345938538516.465, "dur": 392.412, + "args": { + "External id": 981415,"Record function id": 0, "Ev Idx": 4006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938538618.708, "dur": 6.337, + "args": { + "External id": 981416,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938538629.054, "dur": 0.922, + "args": { + "External id": 981417,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938538632.263, "dur": 3.309, + "args": { + "External id": 981418,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938538637.649, "dur": 0.956, + "args": { + "External id": 981419,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938538640.269, "dur": 0.845, + "args": { + "External id": 981420,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938538644.879, "dur": 0.731, + "args": { + "External id": 981421,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938538647.465, "dur": 2.175, + "args": { + "External id": 981422,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938538651.496, "dur": 0.833, + "args": { + "External id": 981423,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938538654.273, "dur": 0.823, + "args": { + "External id": 981424,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938538658.297, "dur": 0.862, + "args": { + "External id": 981425,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938538681.346, "dur": 191.292, + "args": { + "External id": 981426,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938538701.801, "dur": 164.168, + "args": { + "External id": 981427,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938538722.142, "dur": 19.401, + "args": { + "External id": 981428,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938538746.643, "dur": 81.822, + "args": { + "External id": 981429,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938538749.682, "dur": 78.352, + "args": { + "External id": 981430,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938538754.341, "dur": 7.268, + "args": { + "External id": 981431,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938538763.634, "dur": 63.647, + "args": { + "External id": 981432,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4023 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.9", "pid": 2338710, "tid": 2379450, + "ts": 6345938538995.536, "dur": 770.152, + "args": { + "External id": 981433,"Record function id": 0, "Ev Idx": 4024 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2338710, "tid": 2379450, + "ts": 6345938539037.812, "dur": 714.264, + "args": { + "External id": 981434,"Record function id": 0, "Ev Idx": 4025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938539150.628, "dur": 9.449, + "args": { + "External id": 981435,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938539179.292, "dur": 41.145, + "args": { + "External id": 981436,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938539186.712, "dur": 1.953, + "args": { + "External id": 981437,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938539192.466, "dur": 2.166, + "args": { + "External id": 981438,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938539196.744, "dur": 0.308, + "args": { + "External id": 981439,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938539198.722, "dur": 0.466, + "args": { + "External id": 981440,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938539202.248, "dur": 0.519, + "args": { + "External id": 981441,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938539204.717, "dur": 2.852, + "args": { + "External id": 981442,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938539209.012, "dur": 0.440, + "args": { + "External id": 981443,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938539211.991, "dur": 0.361, + "args": { + "External id": 981444,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938539214.352, "dur": 0.458, + "args": { + "External id": 981445,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938539234.322, "dur": 58.303, + "args": { + "External id": 981446,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345938539331.053, "dur": 139.738, + "args": { + "External id": 981447,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938539344.248, "dur": 4.638, + "args": { + "External id": 981448,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345938539354.706, "dur": 11.431, + "args": { + "External id": 981449,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345938539359.171, "dur": 6.490, + "args": { + "External id": 981450,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938539363.797, "dur": 0.566, + "args": { + "External id": 981451,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938539374.171, "dur": 28.732, + "args": { + "External id": 981452,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938539377.327, "dur": 0.493, + "args": { + "External id": 981453,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938539380.040, "dur": 0.363, + "args": { + "External id": 981454,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938539382.035, "dur": 2.801, + "args": { + "External id": 981455,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938539386.875, "dur": 0.593, + "args": { + "External id": 981456,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938539389.161, "dur": 0.502, + "args": { + "External id": 981457,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938539391.151, "dur": 0.439, + "args": { + "External id": 981458,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938539393.591, "dur": 0.498, + "args": { + "External id": 981459,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938539395.666, "dur": 0.418, + "args": { + "External id": 981460,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938539397.939, "dur": 0.255, + "args": { + "External id": 981461,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938539422.409, "dur": 39.548, + "args": { + "External id": 981462,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938539524.321, "dur": 147.569, + "args": { + "External id": 981463,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938539562.231, "dur": 105.578, + "args": { + "External id": 981464,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4055, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938539574.046, "dur": 88.575, + "args": { + "External id": 981465,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938539691.761, "dur": 2.028, + "args": { + "External id": 981466,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4057, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938539783.921, "dur": 2085.113, + "args": { + "External id": 981467,"Sequence number": 10552264, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4058 + } + }, + { + "ph": "f", "id": 202, "pid": 2338710, "tid": 2379450, "ts": 6345938539783.921, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938539910.071, "dur": 196.323, + "args": { + "External id": 981468,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938540163.506, "dur": 49.465, + "args": { + "External id": 981469,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345938540232.841, "dur": 67.734, + "args": { + "External id": 981470,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938540315.298, "dur": 36.487, + "args": { + "External id": 981471,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938540358.866, "dur": 36.799, + "args": { + "External id": 981472,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938540402.741, "dur": 30.487, + "args": { + "External id": 981473,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938540441.443, "dur": 31.234, + "args": { + "External id": 981474,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938540503.303, "dur": 28.541, + "args": { + "External id": 981475,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938540551.840, "dur": 30.190, + "args": { + "External id": 981476,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938540612.106, "dur": 21.489, + "args": { + "External id": 981477,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938540647.566, "dur": 17.246, + "args": { + "External id": 981478,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938540673.997, "dur": 46.755, + "args": { + "External id": 981479,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938540724.455, "dur": 39.835, + "args": { + "External id": 981480,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345938540799.737, "dur": 365.808, + "args": { + "External id": 981481,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938540887.444, "dur": 6.753, + "args": { + "External id": 981482,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938540896.357, "dur": 4.615, + "args": { + "External id": 981483,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938540902.584, "dur": 2.292, + "args": { + "External id": 981484,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938540906.475, "dur": 1.883, + "args": { + "External id": 981485,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938540956.583, "dur": 5.768, + "args": { + "External id": 981486,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938540958.798, "dur": 3.308, + "args": { + "External id": 981487,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938540964.585, "dur": 38.186, + "args": { + "External id": 981488,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938540971.871, "dur": 3.991, + "args": { + "External id": 981489,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938541004.598, "dur": 2.002, + "args": { + "External id": 981490,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938541005.782, "dur": 0.683, + "args": { + "External id": 981491,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938541029.972, "dur": 63.515, + "args": { + "External id": 981492,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938541034.818, "dur": 2.278, + "args": { + "External id": 981493,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938541219.557, "dur": 44.592, + "args": { + "External id": 981494,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938541286.923, "dur": 21.981, + "args": { + "External id": 981495,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938541317.828, "dur": 60.923, + "args": { + "External id": 981496,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938541385.126, "dur": 44.846, + "args": { + "External id": 981497,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938541441.274, "dur": 25.079, + "args": { + "External id": 981498,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938541473.239, "dur": 34.467, + "args": { + "External id": 981499,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938541515.028, "dur": 31.731, + "args": { + "External id": 981500,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938541553.013, "dur": 31.960, + "args": { + "External id": 981501,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345938541607.810, "dur": 27.350, + "args": { + "External id": 981502,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938541655.048, "dur": 28.667, + "args": { + "External id": 981503,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938541701.575, "dur": 20.485, + "args": { + "External id": 981504,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938541783.307, "dur": 17.039, + "args": { + "External id": 981505,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345938541818.045, "dur": 17.509, + "args": { + "External id": 981506,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938541918.522, "dur": 16.961, + "args": { + "External id": 981507,"Record function id": 0, "Ev Idx": 4098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938541921.834, "dur": 12.463, + "args": { + "External id": 981508,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938541926.752, "dur": 6.359, + "args": { + "External id": 981509,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938541928.317, "dur": 4.693, + "args": { + "External id": 981510,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938541940.182, "dur": 5.164, + "args": { + "External id": 981511,"Record function id": 0, "Ev Idx": 4102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938541941.697, "dur": 3.088, + "args": { + "External id": 981512,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938541942.812, "dur": 1.485, + "args": { + "External id": 981513,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938541943.237, "dur": 0.947, + "args": { + "External id": 981514,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938541949.256, "dur": 6.939, + "args": { + "External id": 981515,"Record function id": 0, "Ev Idx": 4106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938541950.726, "dur": 4.952, + "args": { + "External id": 981516,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938541951.463, "dur": 3.730, + "args": { + "External id": 981517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938541952.137, "dur": 2.919, + "args": { + "External id": 981518,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938541959.960, "dur": 4.406, + "args": { + "External id": 981519,"Record function id": 0, "Ev Idx": 4110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938541961.503, "dur": 2.404, + "args": { + "External id": 981520,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938541962.092, "dur": 1.349, + "args": { + "External id": 981521,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938541962.465, "dur": 0.858, + "args": { + "External id": 981522,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938541968.084, "dur": 4.476, + "args": { + "External id": 981523,"Record function id": 0, "Ev Idx": 4114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938541969.580, "dur": 2.488, + "args": { + "External id": 981524,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938541970.131, "dur": 1.441, + "args": { + "External id": 981525,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938541970.632, "dur": 0.826, + "args": { + "External id": 981526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938541976.237, "dur": 4.456, + "args": { + "External id": 981527,"Record function id": 0, "Ev Idx": 4118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938541977.601, "dur": 2.621, + "args": { + "External id": 981528,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938541978.143, "dur": 1.589, + "args": { + "External id": 981529,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938541978.761, "dur": 0.861, + "args": { + "External id": 981530,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938541984.696, "dur": 4.395, + "args": { + "External id": 981531,"Record function id": 0, "Ev Idx": 4122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938541986.272, "dur": 2.245, + "args": { + "External id": 981532,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938541986.867, "dur": 1.120, + "args": { + "External id": 981533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938541987.181, "dur": 0.693, + "args": { + "External id": 981534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938541992.803, "dur": 6.787, + "args": { + "External id": 981535,"Record function id": 0, "Ev Idx": 4126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938541994.033, "dur": 5.088, + "args": { + "External id": 981536,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938541994.643, "dur": 4.018, + "args": { + "External id": 981537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938541997.921, "dur": 0.634, + "args": { + "External id": 981538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938542003.312, "dur": 23.553, + "args": { + "External id": 981539,"Record function id": 0, "Ev Idx": 4130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938542004.621, "dur": 2.470, + "args": { + "External id": 981540,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938542005.398, "dur": 1.226, + "args": { + "External id": 981541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938542005.744, "dur": 0.762, + "args": { + "External id": 981542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938542034.697, "dur": 64880.345, + "args": { + "External id": 981543,"Record function id": 0, "Sequence number": 10552263, "Fwd thread id": 1, "Ev Idx": 4134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938542037.622, "dur": 64867.562, + "args": { + "External id": 981544,"Sequence number": 10552263, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4135 + } + }, + { + "ph": "f", "id": 203, "pid": 2338710, "tid": 2379450, "ts": 6345938542037.622, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.10)", "pid": 2338710, "tid": 2379450, + "ts": 6345938542113.754, "dur": 47.868, + "args": { + "External id": 981545,"Record function id": 0, "Ev Idx": 4136 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.10)", "pid": 2338710, "tid": 2379450, + "ts": 6345938542171.711, "dur": 78.832, + "args": { + "External id": 981546,"Record function id": 0, "Ev Idx": 4137 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.10)", "pid": 2338710, "tid": 2379450, + "ts": 6345938542262.437, "dur": 64632.290, + "args": { + "External id": 981547,"Record function id": 0, "Ev Idx": 4138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938542374.605, "dur": 12.603, + "args": { + "External id": 981548,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938542399.791, "dur": 8.480, + "args": { + "External id": 981549,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938542424.822, "dur": 63411.268, + "args": { + "External id": 981550,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938542441.376, "dur": 63377.844, + "args": { + "External id": 981551,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938542555.417, "dur": 25.171, + "args": { + "External id": 981552,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938542607.407, "dur": 63153.262, + "args": { + "External id": 981553,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938542613.803, "dur": 63145.501, + "args": { + "External id": 981554,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938542619.267, "dur": 10.535, + "args": { + "External id": 981555,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938542632.298, "dur": 63120.311, + "args": { + "External id": 981556,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938605984.152, "dur": 18.129, + "args": { + "External id": 981557,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938605989.767, "dur": 12.069, + "args": { + "External id": 981558,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938606093.703, "dur": 443.447, + "args": { + "External id": 981559,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938606137.501, "dur": 393.629, + "args": { + "External id": 981560,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4151, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938606154.909, "dur": 368.907, + "args": { + "External id": 981561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938606564.239, "dur": 2.441, + "args": { + "External id": 981562,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4153, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938606643.789, "dur": 9.061, + "args": { + "External id": 981563,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938606710.555, "dur": 2.272, + "args": { + "External id": 981564,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938606730.990, "dur": 5.096, + "args": { + "External id": 981565,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938606749.312, "dur": 1.234, + "args": { + "External id": 981566,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938606766.128, "dur": 1.192, + "args": { + "External id": 981567,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938606779.499, "dur": 1.402, + "args": { + "External id": 981568,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938606791.761, "dur": 3.331, + "args": { + "External id": 981569,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938606806.292, "dur": 2.298, + "args": { + "External id": 981570,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938606819.574, "dur": 1.357, + "args": { + "External id": 981571,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938606934.334, "dur": 3495.118, + "args": { + "External id": 981572,"Record function id": 0, "Ev Idx": 4163 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.9)", "pid": 2338710, "tid": 2379450, + "ts": 6345938606960.349, "dur": 1355.424, + "args": { + "External id": 981573,"Record function id": 0, "Ev Idx": 4164 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2338710, "tid": 2379450, + "ts": 6345938606980.657, "dur": 470.263, + "args": { + "External id": 981574,"Record function id": 0, "Ev Idx": 4165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938607151.542, "dur": 7.398, + "args": { + "External id": 981575,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938607163.311, "dur": 1.318, + "args": { + "External id": 981576,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938607166.653, "dur": 2.846, + "args": { + "External id": 981577,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938607171.665, "dur": 0.951, + "args": { + "External id": 981578,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938607174.451, "dur": 1.173, + "args": { + "External id": 981579,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938607177.363, "dur": 0.790, + "args": { + "External id": 981580,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938607180.038, "dur": 2.335, + "args": { + "External id": 981581,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938607186.564, "dur": 0.883, + "args": { + "External id": 981582,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938607192.990, "dur": 0.999, + "args": { + "External id": 981583,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938607195.833, "dur": 0.949, + "args": { + "External id": 981584,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938607222.791, "dur": 189.858, + "args": { + "External id": 981585,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938607243.254, "dur": 164.092, + "args": { + "External id": 981586,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938607261.752, "dur": 21.342, + "args": { + "External id": 981587,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938607290.680, "dur": 83.038, + "args": { + "External id": 981588,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938607293.883, "dur": 79.498, + "args": { + "External id": 981589,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938607298.755, "dur": 6.835, + "args": { + "External id": 981590,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938607307.479, "dur": 65.138, + "args": { + "External id": 981591,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4182 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.8", "pid": 2338710, "tid": 2379450, + "ts": 6345938607541.061, "dur": 766.029, + "args": { + "External id": 981592,"Record function id": 0, "Ev Idx": 4183 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2338710, "tid": 2379450, + "ts": 6345938607560.820, "dur": 732.225, + "args": { + "External id": 981593,"Record function id": 0, "Ev Idx": 4184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938607627.424, "dur": 7.145, + "args": { + "External id": 981594,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938607651.988, "dur": 38.120, + "args": { + "External id": 981595,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938607658.013, "dur": 2.068, + "args": { + "External id": 981596,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938607663.249, "dur": 1.697, + "args": { + "External id": 981597,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938607666.377, "dur": 0.647, + "args": { + "External id": 981598,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938607668.770, "dur": 0.612, + "args": { + "External id": 981599,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938607672.548, "dur": 0.554, + "args": { + "External id": 981600,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938607674.833, "dur": 2.690, + "args": { + "External id": 981601,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938607678.938, "dur": 0.588, + "args": { + "External id": 981602,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938607682.628, "dur": 0.339, + "args": { + "External id": 981603,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938607684.697, "dur": 0.602, + "args": { + "External id": 981604,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938607701.381, "dur": 53.039, + "args": { + "External id": 981605,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345938607790.144, "dur": 134.838, + "args": { + "External id": 981606,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938607804.834, "dur": 4.249, + "args": { + "External id": 981607,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345938607814.989, "dur": 12.075, + "args": { + "External id": 981608,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345938607819.937, "dur": 6.641, + "args": { + "External id": 981609,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938607824.267, "dur": 0.788, + "args": { + "External id": 981610,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938607835.251, "dur": 34.525, + "args": { + "External id": 981611,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938607837.844, "dur": 0.531, + "args": { + "External id": 981612,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938607841.060, "dur": 0.569, + "args": { + "External id": 981613,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938607843.222, "dur": 3.144, + "args": { + "External id": 981614,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938607848.453, "dur": 1.496, + "args": { + "External id": 981615,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938607851.744, "dur": 0.883, + "args": { + "External id": 981616,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938607854.435, "dur": 0.352, + "args": { + "External id": 981617,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938607857.002, "dur": 0.356, + "args": { + "External id": 981618,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938607859.036, "dur": 0.700, + "args": { + "External id": 981619,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938607861.504, "dur": 0.517, + "args": { + "External id": 981620,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938607882.432, "dur": 34.297, + "args": { + "External id": 981621,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938607975.582, "dur": 220.700, + "args": { + "External id": 981622,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938608030.941, "dur": 160.235, + "args": { + "External id": 981623,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4214, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938608049.297, "dur": 136.932, + "args": { + "External id": 981624,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938608221.958, "dur": 2.260, + "args": { + "External id": 981625,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4216, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938608324.538, "dur": 2080.824, + "args": { + "External id": 981626,"Sequence number": 10552262, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4217 + } + }, + { + "ph": "f", "id": 204, "pid": 2338710, "tid": 2379450, "ts": 6345938608324.538, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938608457.116, "dur": 128.701, + "args": { + "External id": 981627,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938608633.923, "dur": 45.145, + "args": { + "External id": 981628,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345938608697.490, "dur": 59.070, + "args": { + "External id": 981629,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938608770.296, "dur": 35.489, + "args": { + "External id": 981630,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938608813.612, "dur": 35.380, + "args": { + "External id": 981631,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938608856.399, "dur": 30.566, + "args": { + "External id": 981632,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938608894.945, "dur": 31.643, + "args": { + "External id": 981633,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938608957.926, "dur": 25.512, + "args": { + "External id": 981634,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938609004.559, "dur": 99.788, + "args": { + "External id": 981635,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938609138.499, "dur": 24.287, + "args": { + "External id": 981636,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938609179.908, "dur": 16.825, + "args": { + "External id": 981637,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938609205.970, "dur": 50.683, + "args": { + "External id": 981638,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938609260.733, "dur": 36.916, + "args": { + "External id": 981639,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345938609332.412, "dur": 323.055, + "args": { + "External id": 981640,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938609422.949, "dur": 6.866, + "args": { + "External id": 981641,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938609432.229, "dur": 3.195, + "args": { + "External id": 981642,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938609437.089, "dur": 3.464, + "args": { + "External id": 981643,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938609441.966, "dur": 2.729, + "args": { + "External id": 981644,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938609518.223, "dur": 12.290, + "args": { + "External id": 981645,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938609520.819, "dur": 8.910, + "args": { + "External id": 981646,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938609535.420, "dur": 40.122, + "args": { + "External id": 981647,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938609542.559, "dur": 4.008, + "args": { + "External id": 981648,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938609577.265, "dur": 1.604, + "args": { + "External id": 981649,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938609578.074, "dur": 0.655, + "args": { + "External id": 981650,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938609580.265, "dur": 17.383, + "args": { + "External id": 981651,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938609584.035, "dur": 0.670, + "args": { + "External id": 981652,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938609700.566, "dur": 38.517, + "args": { + "External id": 981653,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938609762.112, "dur": 17.623, + "args": { + "External id": 981654,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938609788.818, "dur": 52.131, + "args": { + "External id": 981655,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938609848.046, "dur": 45.397, + "args": { + "External id": 981656,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938609904.463, "dur": 25.920, + "args": { + "External id": 981657,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938609937.218, "dur": 35.037, + "args": { + "External id": 981658,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938609979.773, "dur": 54.225, + "args": { + "External id": 981659,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938610048.801, "dur": 81.342, + "args": { + "External id": 981660,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345938610161.241, "dur": 31.586, + "args": { + "External id": 981661,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938610212.973, "dur": 28.692, + "args": { + "External id": 981662,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938610263.380, "dur": 21.721, + "args": { + "External id": 981663,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938610314.055, "dur": 16.274, + "args": { + "External id": 981664,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345938610353.374, "dur": 17.493, + "args": { + "External id": 981665,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938610455.774, "dur": 18.766, + "args": { + "External id": 981666,"Record function id": 0, "Ev Idx": 4257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938610459.232, "dur": 14.318, + "args": { + "External id": 981667,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938610464.499, "dur": 8.095, + "args": { + "External id": 981668,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938610466.290, "dur": 6.185, + "args": { + "External id": 981669,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938610479.079, "dur": 6.518, + "args": { + "External id": 981670,"Record function id": 0, "Ev Idx": 4261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938610481.212, "dur": 3.755, + "args": { + "External id": 981671,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938610482.494, "dur": 1.982, + "args": { + "External id": 981672,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938610483.229, "dur": 1.135, + "args": { + "External id": 981673,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938610489.434, "dur": 4.848, + "args": { + "External id": 981674,"Record function id": 0, "Ev Idx": 4265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938610491.251, "dur": 2.509, + "args": { + "External id": 981675,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938610491.826, "dur": 1.419, + "args": { + "External id": 981676,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938610492.450, "dur": 0.693, + "args": { + "External id": 981677,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938610498.114, "dur": 6.743, + "args": { + "External id": 981678,"Record function id": 0, "Ev Idx": 4269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938610499.714, "dur": 4.635, + "args": { + "External id": 981679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938610500.308, "dur": 3.575, + "args": { + "External id": 981680,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938610500.687, "dur": 3.114, + "args": { + "External id": 981681,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938610508.497, "dur": 4.553, + "args": { + "External id": 981682,"Record function id": 0, "Ev Idx": 4273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938610509.823, "dur": 2.722, + "args": { + "External id": 981683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938610510.553, "dur": 1.550, + "args": { + "External id": 981684,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938610510.961, "dur": 1.059, + "args": { + "External id": 981685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938610516.890, "dur": 5.503, + "args": { + "External id": 981686,"Record function id": 0, "Ev Idx": 4277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938610518.722, "dur": 3.195, + "args": { + "External id": 981687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938610519.598, "dur": 1.867, + "args": { + "External id": 981688,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938610520.539, "dur": 0.816, + "args": { + "External id": 981689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938610526.137, "dur": 4.998, + "args": { + "External id": 981690,"Record function id": 0, "Ev Idx": 4281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938610527.463, "dur": 3.209, + "args": { + "External id": 981691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938610528.503, "dur": 1.722, + "args": { + "External id": 981692,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938610529.258, "dur": 0.888, + "args": { + "External id": 981693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938610534.716, "dur": 5.261, + "args": { + "External id": 981694,"Record function id": 0, "Ev Idx": 4285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938610536.360, "dur": 3.119, + "args": { + "External id": 981695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938610537.194, "dur": 1.815, + "args": { + "External id": 981696,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938610538.004, "dur": 0.925, + "args": { + "External id": 981697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938610543.597, "dur": 6.908, + "args": { + "External id": 981698,"Record function id": 0, "Ev Idx": 4289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938610544.949, "dur": 5.050, + "args": { + "External id": 981699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938610545.854, "dur": 3.688, + "args": { + "External id": 981700,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938610548.698, "dur": 0.767, + "args": { + "External id": 981701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938610555.207, "dur": 64995.088, + "args": { + "External id": 981702,"Record function id": 0, "Sequence number": 10552261, "Fwd thread id": 1, "Ev Idx": 4293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938610557.245, "dur": 64982.460, + "args": { + "External id": 981703,"Sequence number": 10552261, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4294 + } + }, + { + "ph": "f", "id": 205, "pid": 2338710, "tid": 2379450, "ts": 6345938610557.245, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.9)", "pid": 2338710, "tid": 2379450, + "ts": 6345938610592.474, "dur": 43.906, + "args": { + "External id": 981704,"Record function id": 0, "Ev Idx": 4295 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.9)", "pid": 2338710, "tid": 2379450, + "ts": 6345938610646.031, "dur": 69.707, + "args": { + "External id": 981705,"Record function id": 0, "Ev Idx": 4296 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.9)", "pid": 2338710, "tid": 2379450, + "ts": 6345938610725.542, "dur": 64804.688, + "args": { + "External id": 981706,"Record function id": 0, "Ev Idx": 4297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938610830.484, "dur": 8.396, + "args": { + "External id": 981707,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938610850.048, "dur": 5.567, + "args": { + "External id": 981708,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938610872.542, "dur": 63573.587, + "args": { + "External id": 981709,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938610889.192, "dur": 63539.909, + "args": { + "External id": 981710,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938610995.418, "dur": 47.807, + "args": { + "External id": 981711,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938611108.130, "dur": 63267.335, + "args": { + "External id": 981712,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938611111.623, "dur": 63262.476, + "args": { + "External id": 981713,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938611117.801, "dur": 16.297, + "args": { + "External id": 981714,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938611139.750, "dur": 63228.179, + "args": { + "External id": 981715,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938674575.041, "dur": 15.034, + "args": { + "External id": 981716,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938674579.795, "dur": 9.717, + "args": { + "External id": 981717,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938674628.153, "dur": 543.353, + "args": { + "External id": 981718,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938674670.474, "dur": 492.857, + "args": { + "External id": 981719,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4310, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938674686.523, "dur": 468.244, + "args": { + "External id": 981720,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938675203.375, "dur": 2.901, + "args": { + "External id": 981721,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4312, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938675288.736, "dur": 9.567, + "args": { + "External id": 981722,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938675354.344, "dur": 2.552, + "args": { + "External id": 981723,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938675374.990, "dur": 1.761, + "args": { + "External id": 981724,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938675391.835, "dur": 0.932, + "args": { + "External id": 981725,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938675404.049, "dur": 1.108, + "args": { + "External id": 981726,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938675417.316, "dur": 1.004, + "args": { + "External id": 981727,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938675432.575, "dur": 1.506, + "args": { + "External id": 981728,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938675447.096, "dur": 2.504, + "args": { + "External id": 981729,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938675460.825, "dur": 1.015, + "args": { + "External id": 981730,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938675567.993, "dur": 3361.712, + "args": { + "External id": 981731,"Record function id": 0, "Ev Idx": 4322 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.8)", "pid": 2338710, "tid": 2379450, + "ts": 6345938675590.057, "dur": 1268.473, + "args": { + "External id": 981732,"Record function id": 0, "Ev Idx": 4323 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2338710, "tid": 2379450, + "ts": 6345938675607.352, "dur": 373.821, + "args": { + "External id": 981733,"Record function id": 0, "Ev Idx": 4324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938675696.076, "dur": 5.106, + "args": { + "External id": 981734,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938675705.088, "dur": 1.234, + "args": { + "External id": 981735,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938675708.617, "dur": 0.897, + "args": { + "External id": 981736,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938675711.767, "dur": 3.723, + "args": { + "External id": 981737,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938675719.603, "dur": 0.991, + "args": { + "External id": 981738,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938675722.462, "dur": 0.659, + "args": { + "External id": 981739,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938675724.944, "dur": 2.302, + "args": { + "External id": 981740,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938675729.129, "dur": 1.127, + "args": { + "External id": 981741,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938675734.025, "dur": 1.077, + "args": { + "External id": 981742,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938675736.821, "dur": 0.954, + "args": { + "External id": 981743,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938675760.062, "dur": 186.917, + "args": { + "External id": 981744,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938675780.413, "dur": 161.483, + "args": { + "External id": 981745,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938675801.729, "dur": 18.665, + "args": { + "External id": 981746,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938675825.879, "dur": 86.338, + "args": { + "External id": 981747,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938675829.070, "dur": 82.783, + "args": { + "External id": 981748,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938675834.042, "dur": 9.551, + "args": { + "External id": 981749,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938675847.971, "dur": 63.132, + "args": { + "External id": 981750,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4341 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.7", "pid": 2338710, "tid": 2379450, + "ts": 6345938676146.624, "dur": 702.891, + "args": { + "External id": 981751,"Record function id": 0, "Ev Idx": 4342 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2338710, "tid": 2379450, + "ts": 6345938676169.255, "dur": 666.149, + "args": { + "External id": 981752,"Record function id": 0, "Ev Idx": 4343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938676238.531, "dur": 8.993, + "args": { + "External id": 981753,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938676265.924, "dur": 40.751, + "args": { + "External id": 981754,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938676272.517, "dur": 3.358, + "args": { + "External id": 981755,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938676278.404, "dur": 0.581, + "args": { + "External id": 981756,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938676280.810, "dur": 0.766, + "args": { + "External id": 981757,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938676284.640, "dur": 0.678, + "args": { + "External id": 981758,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938676287.309, "dur": 0.557, + "args": { + "External id": 981759,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938676289.422, "dur": 0.576, + "args": { + "External id": 981760,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938676292.693, "dur": 2.687, + "args": { + "External id": 981761,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938676296.891, "dur": 0.426, + "args": { + "External id": 981762,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938676299.156, "dur": 1.608, + "args": { + "External id": 981763,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938676323.311, "dur": 53.538, + "args": { + "External id": 981764,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345938676419.800, "dur": 137.769, + "args": { + "External id": 981765,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938676431.683, "dur": 4.743, + "args": { + "External id": 981766,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345938676442.625, "dur": 12.084, + "args": { + "External id": 981767,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345938676447.754, "dur": 6.444, + "args": { + "External id": 981768,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938676451.995, "dur": 0.703, + "args": { + "External id": 981769,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938676462.824, "dur": 37.547, + "args": { + "External id": 981770,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938676465.891, "dur": 0.673, + "args": { + "External id": 981771,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938676468.713, "dur": 0.617, + "args": { + "External id": 981772,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938676475.215, "dur": 1.965, + "args": { + "External id": 981773,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938676478.872, "dur": 2.818, + "args": { + "External id": 981774,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938676483.477, "dur": 0.681, + "args": { + "External id": 981775,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938676487.692, "dur": 0.544, + "args": { + "External id": 981776,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938676489.850, "dur": 0.458, + "args": { + "External id": 981777,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938676491.728, "dur": 0.466, + "args": { + "External id": 981778,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938676495.563, "dur": 0.555, + "args": { + "External id": 981779,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938676515.139, "dur": 34.033, + "args": { + "External id": 981780,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938676609.756, "dur": 145.184, + "args": { + "External id": 981781,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938676647.175, "dur": 103.487, + "args": { + "External id": 981782,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4373, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938676657.826, "dur": 87.925, + "args": { + "External id": 981783,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938676774.951, "dur": 2.055, + "args": { + "External id": 981784,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4375, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938676866.851, "dur": 2038.579, + "args": { + "External id": 981785,"Sequence number": 10552260, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4376 + } + }, + { + "ph": "f", "id": 206, "pid": 2338710, "tid": 2379450, "ts": 6345938676866.851, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938676986.673, "dur": 193.424, + "args": { + "External id": 981786,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938677248.807, "dur": 45.397, + "args": { + "External id": 981787,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345938677313.964, "dur": 62.552, + "args": { + "External id": 981788,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938677389.288, "dur": 36.145, + "args": { + "External id": 981789,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938677434.961, "dur": 39.851, + "args": { + "External id": 981790,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938677481.993, "dur": 32.779, + "args": { + "External id": 981791,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938677524.678, "dur": 32.265, + "args": { + "External id": 981792,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938677586.997, "dur": 25.506, + "args": { + "External id": 981793,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938677637.120, "dur": 33.874, + "args": { + "External id": 981794,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938677695.808, "dur": 22.103, + "args": { + "External id": 981795,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938677733.279, "dur": 18.170, + "args": { + "External id": 981796,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938677758.941, "dur": 41.945, + "args": { + "External id": 981797,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938677804.367, "dur": 34.959, + "args": { + "External id": 981798,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345938677872.771, "dur": 384.610, + "args": { + "External id": 981799,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938677961.194, "dur": 11.065, + "args": { + "External id": 981800,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938677974.794, "dur": 3.262, + "args": { + "External id": 981801,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938677979.793, "dur": 2.307, + "args": { + "External id": 981802,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938677983.449, "dur": 4.307, + "args": { + "External id": 981803,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938678096.472, "dur": 17.516, + "args": { + "External id": 981804,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938678099.681, "dur": 11.276, + "args": { + "External id": 981805,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938678119.075, "dur": 47.402, + "args": { + "External id": 981806,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938678127.517, "dur": 2.505, + "args": { + "External id": 981807,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938678168.561, "dur": 2.090, + "args": { + "External id": 981808,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938678169.871, "dur": 0.639, + "args": { + "External id": 981809,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938678171.799, "dur": 19.184, + "args": { + "External id": 981810,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938678174.630, "dur": 3.063, + "args": { + "External id": 981811,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938678305.515, "dur": 31.322, + "args": { + "External id": 981812,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938678358.850, "dur": 17.424, + "args": { + "External id": 981813,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938678385.159, "dur": 57.933, + "args": { + "External id": 981814,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938678449.871, "dur": 44.840, + "args": { + "External id": 981815,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938678505.957, "dur": 26.055, + "args": { + "External id": 981816,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938678538.007, "dur": 34.779, + "args": { + "External id": 981817,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938678580.641, "dur": 32.404, + "args": { + "External id": 981818,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938678619.557, "dur": 33.221, + "args": { + "External id": 981819,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345938678680.901, "dur": 27.443, + "args": { + "External id": 981820,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938678726.305, "dur": 32.568, + "args": { + "External id": 981821,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938678778.227, "dur": 20.326, + "args": { + "External id": 981822,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938678821.114, "dur": 17.393, + "args": { + "External id": 981823,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345938678853.179, "dur": 19.498, + "args": { + "External id": 981824,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938678956.363, "dur": 16.700, + "args": { + "External id": 981825,"Record function id": 0, "Ev Idx": 4416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938678959.901, "dur": 12.005, + "args": { + "External id": 981826,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938678964.931, "dur": 5.990, + "args": { + "External id": 981827,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938678966.535, "dur": 4.290, + "args": { + "External id": 981828,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938678977.670, "dur": 5.567, + "args": { + "External id": 981829,"Record function id": 0, "Ev Idx": 4420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938678979.448, "dur": 3.158, + "args": { + "External id": 981830,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938678980.420, "dur": 1.632, + "args": { + "External id": 981831,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938678980.999, "dur": 0.891, + "args": { + "External id": 981832,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938678986.958, "dur": 7.292, + "args": { + "External id": 981833,"Record function id": 0, "Ev Idx": 4424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938678988.789, "dur": 4.992, + "args": { + "External id": 981834,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938678989.407, "dur": 3.886, + "args": { + "External id": 981835,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938678989.946, "dur": 3.252, + "args": { + "External id": 981836,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938678998.085, "dur": 5.079, + "args": { + "External id": 981837,"Record function id": 0, "Ev Idx": 4428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938678999.686, "dur": 2.967, + "args": { + "External id": 981838,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938679000.268, "dur": 1.926, + "args": { + "External id": 981839,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938679000.926, "dur": 1.185, + "args": { + "External id": 981840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938679006.835, "dur": 28.798, + "args": { + "External id": 981841,"Record function id": 0, "Ev Idx": 4432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938679029.364, "dur": 5.257, + "args": { + "External id": 981842,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938679030.927, "dur": 2.726, + "args": { + "External id": 981843,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938679031.750, "dur": 1.597, + "args": { + "External id": 981844,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938679041.113, "dur": 5.259, + "args": { + "External id": 981845,"Record function id": 0, "Ev Idx": 4436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938679042.937, "dur": 2.973, + "args": { + "External id": 981846,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938679043.815, "dur": 1.628, + "args": { + "External id": 981847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938679044.494, "dur": 0.861, + "args": { + "External id": 981848,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938679050.581, "dur": 49.563, + "args": { + "External id": 981849,"Record function id": 0, "Ev Idx": 4440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938679090.334, "dur": 8.611, + "args": { + "External id": 981850,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938679092.366, "dur": 5.258, + "args": { + "External id": 981851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938679095.781, "dur": 1.544, + "args": { + "External id": 981852,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938679106.540, "dur": 5.305, + "args": { + "External id": 981853,"Record function id": 0, "Ev Idx": 4444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938679108.473, "dur": 2.862, + "args": { + "External id": 981854,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938679109.325, "dur": 1.536, + "args": { + "External id": 981855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938679109.960, "dur": 0.779, + "args": { + "External id": 981856,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938679115.806, "dur": 4.395, + "args": { + "External id": 981857,"Record function id": 0, "Ev Idx": 4448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938679117.096, "dur": 2.626, + "args": { + "External id": 981858,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938679117.887, "dur": 1.372, + "args": { + "External id": 981859,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938679118.226, "dur": 0.900, + "args": { + "External id": 981860,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938679125.423, "dur": 62392.353, + "args": { + "External id": 981861,"Record function id": 0, "Sequence number": 10552259, "Fwd thread id": 1, "Ev Idx": 4452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938679127.373, "dur": 62379.670, + "args": { + "External id": 981862,"Sequence number": 10552259, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4453 + } + }, + { + "ph": "f", "id": 207, "pid": 2338710, "tid": 2379450, "ts": 6345938679127.373, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.8)", "pid": 2338710, "tid": 2379450, + "ts": 6345938679167.926, "dur": 42.322, + "args": { + "External id": 981863,"Record function id": 0, "Ev Idx": 4454 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.8)", "pid": 2338710, "tid": 2379450, + "ts": 6345938679219.672, "dur": 71.951, + "args": { + "External id": 981864,"Record function id": 0, "Ev Idx": 4455 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.8)", "pid": 2338710, "tid": 2379450, + "ts": 6345938679298.360, "dur": 62198.793, + "args": { + "External id": 981865,"Record function id": 0, "Ev Idx": 4456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938679406.948, "dur": 9.152, + "args": { + "External id": 981866,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938679427.792, "dur": 11.281, + "args": { + "External id": 981867,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938679459.927, "dur": 60997.535, + "args": { + "External id": 981868,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938679476.746, "dur": 60963.617, + "args": { + "External id": 981869,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938679581.513, "dur": 22.632, + "args": { + "External id": 981870,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938679628.813, "dur": 60751.275, + "args": { + "External id": 981871,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938679633.121, "dur": 60745.596, + "args": { + "External id": 981872,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938679640.383, "dur": 10.136, + "args": { + "External id": 981873,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938679653.001, "dur": 60718.716, + "args": { + "External id": 981874,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938740596.982, "dur": 16.572, + "args": { + "External id": 981875,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938740601.950, "dur": 11.117, + "args": { + "External id": 981876,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938740654.500, "dur": 497.246, + "args": { + "External id": 981877,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938740698.155, "dur": 446.974, + "args": { + "External id": 981878,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4469, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938740716.327, "dur": 421.228, + "args": { + "External id": 981879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938741181.816, "dur": 2.746, + "args": { + "External id": 981880,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4471, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938741259.696, "dur": 8.158, + "args": { + "External id": 981881,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938741321.636, "dur": 1.335, + "args": { + "External id": 981882,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938741340.130, "dur": 4.422, + "args": { + "External id": 981883,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938741357.077, "dur": 1.121, + "args": { + "External id": 981884,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938741372.554, "dur": 0.930, + "args": { + "External id": 981885,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938741384.780, "dur": 1.136, + "args": { + "External id": 981886,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938741396.663, "dur": 3.254, + "args": { + "External id": 981887,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938741411.253, "dur": 2.497, + "args": { + "External id": 981888,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938741425.041, "dur": 0.995, + "args": { + "External id": 981889,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938741534.834, "dur": 3392.967, + "args": { + "External id": 981890,"Record function id": 0, "Ev Idx": 4481 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.7)", "pid": 2338710, "tid": 2379450, + "ts": 6345938741560.222, "dur": 1251.402, + "args": { + "External id": 981891,"Record function id": 0, "Ev Idx": 4482 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2338710, "tid": 2379450, + "ts": 6345938741577.775, "dur": 372.758, + "args": { + "External id": 981892,"Record function id": 0, "Ev Idx": 4483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938741666.102, "dur": 5.083, + "args": { + "External id": 981893,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938741674.978, "dur": 0.790, + "args": { + "External id": 981894,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938741677.812, "dur": 3.411, + "args": { + "External id": 981895,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938741683.457, "dur": 0.863, + "args": { + "External id": 981896,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938741686.141, "dur": 0.685, + "args": { + "External id": 981897,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938741688.394, "dur": 0.946, + "args": { + "External id": 981898,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938741691.240, "dur": 2.107, + "args": { + "External id": 981899,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938741697.570, "dur": 0.715, + "args": { + "External id": 981900,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938741700.074, "dur": 0.897, + "args": { + "External id": 981901,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938741702.536, "dur": 0.620, + "args": { + "External id": 981902,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938741723.867, "dur": 190.931, + "args": { + "External id": 981903,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938741743.877, "dur": 164.386, + "args": { + "External id": 981904,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938741764.508, "dur": 18.323, + "args": { + "External id": 981905,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938741789.065, "dur": 85.023, + "args": { + "External id": 981906,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938741791.981, "dur": 81.650, + "args": { + "External id": 981907,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938741797.330, "dur": 5.809, + "args": { + "External id": 981908,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938741806.212, "dur": 66.546, + "args": { + "External id": 981909,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4500 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.6", "pid": 2338710, "tid": 2379450, + "ts": 6345938742103.492, "dur": 699.562, + "args": { + "External id": 981910,"Record function id": 0, "Ev Idx": 4501 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2338710, "tid": 2379450, + "ts": 6345938742126.163, "dur": 663.053, + "args": { + "External id": 981911,"Record function id": 0, "Ev Idx": 4502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938742197.759, "dur": 9.730, + "args": { + "External id": 981912,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938742227.589, "dur": 41.496, + "args": { + "External id": 981913,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938742234.651, "dur": 4.114, + "args": { + "External id": 981914,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938742241.359, "dur": 0.557, + "args": { + "External id": 981915,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938742243.292, "dur": 0.556, + "args": { + "External id": 981916,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938742247.292, "dur": 0.457, + "args": { + "External id": 981917,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938742249.399, "dur": 0.678, + "args": { + "External id": 981918,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938742251.707, "dur": 2.935, + "args": { + "External id": 981919,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938742257.362, "dur": 0.420, + "args": { + "External id": 981920,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938742259.769, "dur": 0.342, + "args": { + "External id": 981921,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938742261.616, "dur": 1.874, + "args": { + "External id": 981922,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938742282.553, "dur": 55.213, + "args": { + "External id": 981923,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345938742378.378, "dur": 133.705, + "args": { + "External id": 981924,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938742391.798, "dur": 4.480, + "args": { + "External id": 981925,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345938742402.846, "dur": 13.216, + "args": { + "External id": 981926,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345938742408.966, "dur": 6.620, + "args": { + "External id": 981927,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938742413.439, "dur": 0.814, + "args": { + "External id": 981928,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938742424.538, "dur": 31.800, + "args": { + "External id": 981929,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938742427.090, "dur": 0.502, + "args": { + "External id": 981930,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938742429.807, "dur": 0.403, + "args": { + "External id": 981931,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938742431.966, "dur": 3.464, + "args": { + "External id": 981932,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938742437.388, "dur": 0.664, + "args": { + "External id": 981933,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938742439.497, "dur": 0.443, + "args": { + "External id": 981934,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938742442.972, "dur": 0.546, + "args": { + "External id": 981935,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938742445.253, "dur": 0.520, + "args": { + "External id": 981936,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938742447.302, "dur": 0.438, + "args": { + "External id": 981937,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938742451.581, "dur": 0.379, + "args": { + "External id": 981938,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938742468.361, "dur": 34.389, + "args": { + "External id": 981939,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938742562.720, "dur": 144.980, + "args": { + "External id": 981940,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938742597.697, "dur": 106.145, + "args": { + "External id": 981941,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4532, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938742608.699, "dur": 90.019, + "args": { + "External id": 981942,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938742728.980, "dur": 2.133, + "args": { + "External id": 981943,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4534, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938742820.333, "dur": 2083.930, + "args": { + "External id": 981944,"Sequence number": 10552258, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4535 + } + }, + { + "ph": "f", "id": 208, "pid": 2338710, "tid": 2379450, "ts": 6345938742820.333, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938742946.886, "dur": 190.943, + "args": { + "External id": 981945,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938743196.653, "dur": 47.857, + "args": { + "External id": 981946,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345938743281.222, "dur": 72.548, + "args": { + "External id": 981947,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938743368.782, "dur": 39.321, + "args": { + "External id": 981948,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938743415.574, "dur": 37.205, + "args": { + "External id": 981949,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938743460.093, "dur": 31.518, + "args": { + "External id": 981950,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938743497.987, "dur": 31.708, + "args": { + "External id": 981951,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938743563.700, "dur": 27.812, + "args": { + "External id": 981952,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938743614.263, "dur": 30.590, + "args": { + "External id": 981953,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938743670.010, "dur": 23.767, + "args": { + "External id": 981954,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938743710.438, "dur": 17.561, + "args": { + "External id": 981955,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938743737.000, "dur": 40.771, + "args": { + "External id": 981956,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938743781.581, "dur": 35.080, + "args": { + "External id": 981957,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345938743851.547, "dur": 397.397, + "args": { + "External id": 981958,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938743941.502, "dur": 7.803, + "args": { + "External id": 981959,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938743952.011, "dur": 3.700, + "args": { + "External id": 981960,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938743957.258, "dur": 2.189, + "args": { + "External id": 981961,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938743960.876, "dur": 3.811, + "args": { + "External id": 981962,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938744088.385, "dur": 16.067, + "args": { + "External id": 981963,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938744098.527, "dur": 4.661, + "args": { + "External id": 981964,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938744106.855, "dur": 49.132, + "args": { + "External id": 981965,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938744114.758, "dur": 4.341, + "args": { + "External id": 981966,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938744158.173, "dur": 1.828, + "args": { + "External id": 981967,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938744159.315, "dur": 0.555, + "args": { + "External id": 981968,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938744161.109, "dur": 18.410, + "args": { + "External id": 981969,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938744165.365, "dur": 0.683, + "args": { + "External id": 981970,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938744296.345, "dur": 34.924, + "args": { + "External id": 981971,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938744354.539, "dur": 18.166, + "args": { + "External id": 981972,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938744381.592, "dur": 60.700, + "args": { + "External id": 981973,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938744449.532, "dur": 49.187, + "args": { + "External id": 981974,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938744509.847, "dur": 25.142, + "args": { + "External id": 981975,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938744544.704, "dur": 34.846, + "args": { + "External id": 981976,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938744587.138, "dur": 31.200, + "args": { + "External id": 981977,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938744627.242, "dur": 35.863, + "args": { + "External id": 981978,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345938744685.886, "dur": 30.100, + "args": { + "External id": 981979,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938744735.562, "dur": 25.180, + "args": { + "External id": 981980,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938744779.276, "dur": 19.051, + "args": { + "External id": 981981,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938744820.514, "dur": 16.382, + "args": { + "External id": 981982,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345938744851.329, "dur": 17.871, + "args": { + "External id": 981983,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938744954.691, "dur": 16.915, + "args": { + "External id": 981984,"Record function id": 0, "Ev Idx": 4575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938744958.773, "dur": 11.761, + "args": { + "External id": 981985,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938744963.454, "dur": 6.025, + "args": { + "External id": 981986,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938744965.100, "dur": 4.274, + "args": { + "External id": 981987,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938744976.046, "dur": 5.134, + "args": { + "External id": 981988,"Record function id": 0, "Ev Idx": 4579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938744977.775, "dur": 2.906, + "args": { + "External id": 981989,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938744978.608, "dur": 1.569, + "args": { + "External id": 981990,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938744979.254, "dur": 0.802, + "args": { + "External id": 981991,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938744985.274, "dur": 7.699, + "args": { + "External id": 981992,"Record function id": 0, "Ev Idx": 4583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938744986.793, "dur": 5.656, + "args": { + "External id": 981993,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938744987.407, "dur": 4.522, + "args": { + "External id": 981994,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938744988.243, "dur": 3.563, + "args": { + "External id": 981995,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938744996.839, "dur": 5.140, + "args": { + "External id": 981996,"Record function id": 0, "Ev Idx": 4587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938744998.690, "dur": 2.794, + "args": { + "External id": 981997,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938744999.580, "dur": 1.435, + "args": { + "External id": 981998,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938745000.040, "dur": 0.898, + "args": { + "External id": 981999,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938745005.692, "dur": 27.357, + "args": { + "External id": 982000,"Record function id": 0, "Ev Idx": 4591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938745007.160, "dur": 24.632, + "args": { + "External id": 982001,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938745028.234, "dur": 2.600, + "args": { + "External id": 982002,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938745028.854, "dur": 1.667, + "args": { + "External id": 982003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938745038.968, "dur": 5.268, + "args": { + "External id": 982004,"Record function id": 0, "Ev Idx": 4595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938745040.611, "dur": 3.052, + "args": { + "External id": 982005,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938745041.653, "dur": 1.513, + "args": { + "External id": 982006,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938745042.373, "dur": 0.701, + "args": { + "External id": 982007,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938745047.973, "dur": 6.866, + "args": { + "External id": 982008,"Record function id": 0, "Ev Idx": 4599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938745049.519, "dur": 4.831, + "args": { + "External id": 982009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938745050.074, "dur": 3.790, + "args": { + "External id": 982010,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938745052.933, "dur": 0.762, + "args": { + "External id": 982011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938745099.115, "dur": 7.569, + "args": { + "External id": 982012,"Record function id": 0, "Ev Idx": 4603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938745101.196, "dur": 4.781, + "args": { + "External id": 982013,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938745102.562, "dur": 2.419, + "args": { + "External id": 982014,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938745103.280, "dur": 1.519, + "args": { + "External id": 982015,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938745110.594, "dur": 34.881, + "args": { + "External id": 982016,"Record function id": 0, "Ev Idx": 4607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938745142.188, "dur": 2.765, + "args": { + "External id": 982017,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938745143.089, "dur": 1.348, + "args": { + "External id": 982018,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938745143.535, "dur": 0.773, + "args": { + "External id": 982019,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938745151.153, "dur": 67454.395, + "args": { + "External id": 982020,"Record function id": 0, "Sequence number": 10552257, "Fwd thread id": 1, "Ev Idx": 4611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938745153.328, "dur": 67442.750, + "args": { + "External id": 982021,"Sequence number": 10552257, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4612 + } + }, + { + "ph": "f", "id": 209, "pid": 2338710, "tid": 2379450, "ts": 6345938745153.328, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.7)", "pid": 2338710, "tid": 2379450, + "ts": 6345938745190.817, "dur": 47.191, + "args": { + "External id": 982022,"Record function id": 0, "Ev Idx": 4613 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.7)", "pid": 2338710, "tid": 2379450, + "ts": 6345938745247.679, "dur": 73.383, + "args": { + "External id": 982023,"Record function id": 0, "Ev Idx": 4614 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.7)", "pid": 2338710, "tid": 2379450, + "ts": 6345938745327.966, "dur": 67258.051, + "args": { + "External id": 982024,"Record function id": 0, "Ev Idx": 4615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938745440.063, "dur": 9.535, + "args": { + "External id": 982025,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938745461.010, "dur": 7.558, + "args": { + "External id": 982026,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938745487.908, "dur": 65989.749, + "args": { + "External id": 982027,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938745503.892, "dur": 65956.886, + "args": { + "External id": 982028,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938745606.932, "dur": 21.073, + "args": { + "External id": 982029,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938745653.203, "dur": 65750.594, + "args": { + "External id": 982030,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938745657.643, "dur": 65744.862, + "args": { + "External id": 982031,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938745663.027, "dur": 12.323, + "args": { + "External id": 982032,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938745678.156, "dur": 65717.948, + "args": { + "External id": 982033,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938811620.544, "dur": 16.432, + "args": { + "External id": 982034,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938811625.750, "dur": 10.727, + "args": { + "External id": 982035,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938811677.751, "dur": 524.647, + "args": { + "External id": 982036,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938811723.131, "dur": 471.332, + "args": { + "External id": 982037,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4628, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938811741.230, "dur": 443.055, + "args": { + "External id": 982038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938812238.018, "dur": 4.166, + "args": { + "External id": 982039,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4630, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938812332.978, "dur": 9.224, + "args": { + "External id": 982040,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938812398.593, "dur": 1.747, + "args": { + "External id": 982041,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938812418.173, "dur": 4.300, + "args": { + "External id": 982042,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938812435.513, "dur": 1.311, + "args": { + "External id": 982043,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938812453.422, "dur": 0.947, + "args": { + "External id": 982044,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938812467.192, "dur": 0.987, + "args": { + "External id": 982045,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938812480.026, "dur": 3.486, + "args": { + "External id": 982046,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938812496.546, "dur": 2.828, + "args": { + "External id": 982047,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938812511.387, "dur": 0.907, + "args": { + "External id": 982048,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938812623.909, "dur": 3416.153, + "args": { + "External id": 982049,"Record function id": 0, "Ev Idx": 4640 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.6)", "pid": 2338710, "tid": 2379450, + "ts": 6345938812650.355, "dur": 1281.491, + "args": { + "External id": 982050,"Record function id": 0, "Ev Idx": 4641 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2338710, "tid": 2379450, + "ts": 6345938812670.754, "dur": 458.541, + "args": { + "External id": 982051,"Record function id": 0, "Ev Idx": 4642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938812763.317, "dur": 6.143, + "args": { + "External id": 982052,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938812773.111, "dur": 0.982, + "args": { + "External id": 982053,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938812776.114, "dur": 3.250, + "args": { + "External id": 982054,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938812781.401, "dur": 0.826, + "args": { + "External id": 982055,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938812784.012, "dur": 0.823, + "args": { + "External id": 982056,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938812786.482, "dur": 0.839, + "args": { + "External id": 982057,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938812789.162, "dur": 2.024, + "args": { + "External id": 982058,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938812794.711, "dur": 0.954, + "args": { + "External id": 982059,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938812797.277, "dur": 0.945, + "args": { + "External id": 982060,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938812799.728, "dur": 0.762, + "args": { + "External id": 982061,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938812821.055, "dur": 220.978, + "args": { + "External id": 982062,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938812841.285, "dur": 192.958, + "args": { + "External id": 982063,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938812863.835, "dur": 21.932, + "args": { + "External id": 982064,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938812891.017, "dur": 86.011, + "args": { + "External id": 982065,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938812894.138, "dur": 82.520, + "args": { + "External id": 982066,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938812899.221, "dur": 6.581, + "args": { + "External id": 982067,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938812907.809, "dur": 67.972, + "args": { + "External id": 982068,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4659 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.5", "pid": 2338710, "tid": 2379450, + "ts": 6345938813231.539, "dur": 691.245, + "args": { + "External id": 982069,"Record function id": 0, "Ev Idx": 4660 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2338710, "tid": 2379450, + "ts": 6345938813253.720, "dur": 655.448, + "args": { + "External id": 982070,"Record function id": 0, "Ev Idx": 4661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938813321.520, "dur": 7.671, + "args": { + "External id": 982071,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938813348.902, "dur": 39.904, + "args": { + "External id": 982072,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938813355.068, "dur": 3.858, + "args": { + "External id": 982073,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938813361.382, "dur": 0.568, + "args": { + "External id": 982074,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938813364.022, "dur": 0.763, + "args": { + "External id": 982075,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938813367.830, "dur": 0.517, + "args": { + "External id": 982076,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938813370.267, "dur": 0.489, + "args": { + "External id": 982077,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938813372.295, "dur": 2.990, + "args": { + "External id": 982078,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938813377.514, "dur": 0.422, + "args": { + "External id": 982079,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938813379.460, "dur": 0.370, + "args": { + "External id": 982080,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938813381.653, "dur": 1.487, + "args": { + "External id": 982081,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938813399.782, "dur": 57.400, + "args": { + "External id": 982082,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345938813494.426, "dur": 136.267, + "args": { + "External id": 982083,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938813507.740, "dur": 5.100, + "args": { + "External id": 982084,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345938813519.038, "dur": 11.502, + "args": { + "External id": 982085,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345938813523.836, "dur": 6.202, + "args": { + "External id": 982086,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938813527.990, "dur": 0.769, + "args": { + "External id": 982087,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938813538.998, "dur": 31.632, + "args": { + "External id": 982088,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938813541.606, "dur": 0.577, + "args": { + "External id": 982089,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938813544.418, "dur": 0.499, + "args": { + "External id": 982090,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938813546.691, "dur": 3.627, + "args": { + "External id": 982091,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938813551.903, "dur": 0.540, + "args": { + "External id": 982092,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938813554.205, "dur": 0.539, + "args": { + "External id": 982093,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938813558.403, "dur": 0.552, + "args": { + "External id": 982094,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938813560.592, "dur": 0.386, + "args": { + "External id": 982095,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938813562.498, "dur": 0.492, + "args": { + "External id": 982096,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938813565.689, "dur": 0.485, + "args": { + "External id": 982097,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938813584.692, "dur": 37.468, + "args": { + "External id": 982098,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938813680.744, "dur": 148.200, + "args": { + "External id": 982099,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938813716.165, "dur": 108.866, + "args": { + "External id": 982100,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4691, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938813728.471, "dur": 91.349, + "args": { + "External id": 982101,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938813849.364, "dur": 2.185, + "args": { + "External id": 982102,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4693, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938813939.840, "dur": 2057.456, + "args": { + "External id": 982103,"Sequence number": 10552256, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4694 + } + }, + { + "ph": "f", "id": 210, "pid": 2338710, "tid": 2379450, "ts": 6345938813939.840, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938814146.275, "dur": 131.933, + "args": { + "External id": 982104,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938814332.399, "dur": 46.087, + "args": { + "External id": 982105,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345938814398.180, "dur": 57.745, + "args": { + "External id": 982106,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938814469.340, "dur": 35.086, + "args": { + "External id": 982107,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938814523.873, "dur": 39.066, + "args": { + "External id": 982108,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938814570.385, "dur": 30.426, + "args": { + "External id": 982109,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938814608.928, "dur": 31.884, + "args": { + "External id": 982110,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938814672.531, "dur": 27.084, + "args": { + "External id": 982111,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938814725.795, "dur": 32.996, + "args": { + "External id": 982112,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938814784.714, "dur": 23.953, + "args": { + "External id": 982113,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938814823.561, "dur": 15.605, + "args": { + "External id": 982114,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938814849.141, "dur": 40.744, + "args": { + "External id": 982115,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938814893.500, "dur": 35.302, + "args": { + "External id": 982116,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345938814960.421, "dur": 382.047, + "args": { + "External id": 982117,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938815114.465, "dur": 9.568, + "args": { + "External id": 982118,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938815126.809, "dur": 4.322, + "args": { + "External id": 982119,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938815132.941, "dur": 4.429, + "args": { + "External id": 982120,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938815139.031, "dur": 2.399, + "args": { + "External id": 982121,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938815207.906, "dur": 9.445, + "args": { + "External id": 982122,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938815212.764, "dur": 3.747, + "args": { + "External id": 982123,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938815219.551, "dur": 39.733, + "args": { + "External id": 982124,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938815226.566, "dur": 4.309, + "args": { + "External id": 982125,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938815260.787, "dur": 1.828, + "args": { + "External id": 982126,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938815261.869, "dur": 0.653, + "args": { + "External id": 982127,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938815263.810, "dur": 17.985, + "args": { + "External id": 982128,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938815268.443, "dur": 0.640, + "args": { + "External id": 982129,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938815392.063, "dur": 38.727, + "args": { + "External id": 982130,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938815454.201, "dur": 17.975, + "args": { + "External id": 982131,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938815480.879, "dur": 63.187, + "args": { + "External id": 982132,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938815552.227, "dur": 49.485, + "args": { + "External id": 982133,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938815612.695, "dur": 23.749, + "args": { + "External id": 982134,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938815642.520, "dur": 37.143, + "args": { + "External id": 982135,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938815686.623, "dur": 29.689, + "args": { + "External id": 982136,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938815727.940, "dur": 32.428, + "args": { + "External id": 982137,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345938815784.929, "dur": 26.209, + "args": { + "External id": 982138,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938815830.115, "dur": 25.833, + "args": { + "External id": 982139,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938815875.180, "dur": 18.858, + "args": { + "External id": 982140,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938815913.858, "dur": 15.473, + "args": { + "External id": 982141,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345938815946.275, "dur": 17.028, + "args": { + "External id": 982142,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938816107.936, "dur": 20.702, + "args": { + "External id": 982143,"Record function id": 0, "Ev Idx": 4734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938816112.758, "dur": 14.092, + "args": { + "External id": 982144,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938816117.947, "dur": 7.075, + "args": { + "External id": 982145,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938816119.854, "dur": 4.922, + "args": { + "External id": 982146,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938816133.491, "dur": 4.963, + "args": { + "External id": 982147,"Record function id": 0, "Ev Idx": 4738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938816135.131, "dur": 2.800, + "args": { + "External id": 982148,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938816135.932, "dur": 1.474, + "args": { + "External id": 982149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938816136.372, "dur": 0.906, + "args": { + "External id": 982150,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938816142.571, "dur": 5.040, + "args": { + "External id": 982151,"Record function id": 0, "Ev Idx": 4742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938816143.984, "dur": 3.117, + "args": { + "External id": 982152,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938816144.653, "dur": 1.964, + "args": { + "External id": 982153,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938816145.427, "dur": 1.064, + "args": { + "External id": 982154,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938816151.399, "dur": 6.979, + "args": { + "External id": 982155,"Record function id": 0, "Ev Idx": 4746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938816152.742, "dur": 5.134, + "args": { + "External id": 982156,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938816153.403, "dur": 3.993, + "args": { + "External id": 982157,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938816153.732, "dur": 3.546, + "args": { + "External id": 982158,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938816162.063, "dur": 4.375, + "args": { + "External id": 982159,"Record function id": 0, "Ev Idx": 4750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938816163.491, "dur": 2.460, + "args": { + "External id": 982160,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938816164.131, "dur": 1.278, + "args": { + "External id": 982161,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938816164.525, "dur": 0.772, + "args": { + "External id": 982162,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938816170.212, "dur": 4.429, + "args": { + "External id": 982163,"Record function id": 0, "Ev Idx": 4754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938816171.613, "dur": 2.547, + "args": { + "External id": 982164,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938816172.262, "dur": 1.398, + "args": { + "External id": 982165,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938816172.923, "dur": 0.623, + "args": { + "External id": 982166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938816178.410, "dur": 6.715, + "args": { + "External id": 982167,"Record function id": 0, "Ev Idx": 4758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938816179.747, "dur": 4.901, + "args": { + "External id": 982168,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938816180.516, "dur": 3.629, + "args": { + "External id": 982169,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938816183.209, "dur": 0.836, + "args": { + "External id": 982170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938816188.819, "dur": 4.567, + "args": { + "External id": 982171,"Record function id": 0, "Ev Idx": 4762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938816190.202, "dur": 2.710, + "args": { + "External id": 982172,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938816190.871, "dur": 1.517, + "args": { + "External id": 982173,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938816191.364, "dur": 0.896, + "args": { + "External id": 982174,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938816197.522, "dur": 4.026, + "args": { + "External id": 982175,"Record function id": 0, "Ev Idx": 4766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938816198.827, "dur": 2.240, + "args": { + "External id": 982176,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938816199.412, "dur": 1.186, + "args": { + "External id": 982177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938816199.747, "dur": 0.707, + "args": { + "External id": 982178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938816206.610, "dur": 64168.301, + "args": { + "External id": 982179,"Record function id": 0, "Sequence number": 10552255, "Fwd thread id": 1, "Ev Idx": 4770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938816208.468, "dur": 64154.190, + "args": { + "External id": 982180,"Sequence number": 10552255, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4771 + } + }, + { + "ph": "f", "id": 211, "pid": 2338710, "tid": 2379450, "ts": 6345938816208.468, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.6)", "pid": 2338710, "tid": 2379450, + "ts": 6345938816245.747, "dur": 47.654, + "args": { + "External id": 982181,"Record function id": 0, "Ev Idx": 4772 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.6)", "pid": 2338710, "tid": 2379450, + "ts": 6345938816302.872, "dur": 74.807, + "args": { + "External id": 982182,"Record function id": 0, "Ev Idx": 4773 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.6)", "pid": 2338710, "tid": 2379450, + "ts": 6345938816384.784, "dur": 63966.843, + "args": { + "External id": 982183,"Record function id": 0, "Ev Idx": 4774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938816492.856, "dur": 8.776, + "args": { + "External id": 982184,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938816513.246, "dur": 5.126, + "args": { + "External id": 982185,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938816539.284, "dur": 62716.305, + "args": { + "External id": 982186,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938816558.057, "dur": 62680.737, + "args": { + "External id": 982187,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938816660.659, "dur": 25.074, + "args": { + "External id": 982188,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938816708.476, "dur": 62471.527, + "args": { + "External id": 982189,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938816712.301, "dur": 62466.245, + "args": { + "External id": 982190,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938816718.146, "dur": 13.041, + "args": { + "External id": 982191,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938816733.492, "dur": 62438.273, + "args": { + "External id": 982192,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938879394.109, "dur": 16.158, + "args": { + "External id": 982193,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938879399.210, "dur": 10.541, + "args": { + "External id": 982194,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938879450.078, "dur": 482.248, + "args": { + "External id": 982195,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938879493.722, "dur": 432.640, + "args": { + "External id": 982196,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4787, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938879511.634, "dur": 407.586, + "args": { + "External id": 982197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938879957.667, "dur": 2.709, + "args": { + "External id": 982198,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4789, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938880092.400, "dur": 9.316, + "args": { + "External id": 982199,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938880162.175, "dur": 2.838, + "args": { + "External id": 982200,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938880184.318, "dur": 1.896, + "args": { + "External id": 982201,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938880204.307, "dur": 0.949, + "args": { + "External id": 982202,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938880222.623, "dur": 0.950, + "args": { + "External id": 982203,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938880236.559, "dur": 0.964, + "args": { + "External id": 982204,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938880248.799, "dur": 0.947, + "args": { + "External id": 982205,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938880263.029, "dur": 2.299, + "args": { + "External id": 982206,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938880275.524, "dur": 0.656, + "args": { + "External id": 982207,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938880395.801, "dur": 3461.960, + "args": { + "External id": 982208,"Record function id": 0, "Ev Idx": 4799 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.5)", "pid": 2338710, "tid": 2379450, + "ts": 6345938880421.234, "dur": 1284.595, + "args": { + "External id": 982209,"Record function id": 0, "Ev Idx": 4800 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2338710, "tid": 2379450, + "ts": 6345938880439.726, "dur": 395.252, + "args": { + "External id": 982210,"Record function id": 0, "Ev Idx": 4801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938880543.091, "dur": 6.100, + "args": { + "External id": 982211,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938880553.132, "dur": 0.839, + "args": { + "External id": 982212,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938880555.782, "dur": 0.970, + "args": { + "External id": 982213,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938880558.983, "dur": 3.221, + "args": { + "External id": 982214,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938880563.797, "dur": 0.758, + "args": { + "External id": 982215,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938880566.357, "dur": 1.090, + "args": { + "External id": 982216,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938880569.170, "dur": 2.047, + "args": { + "External id": 982217,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938880575.202, "dur": 0.795, + "args": { + "External id": 982218,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938880577.613, "dur": 0.634, + "args": { + "External id": 982219,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938880579.721, "dur": 0.664, + "args": { + "External id": 982220,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938880604.330, "dur": 195.192, + "args": { + "External id": 982221,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938880625.063, "dur": 168.904, + "args": { + "External id": 982222,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938880648.550, "dur": 21.112, + "args": { + "External id": 982223,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938880675.305, "dur": 84.474, + "args": { + "External id": 982224,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938880678.448, "dur": 80.870, + "args": { + "External id": 982225,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938880684.658, "dur": 10.547, + "args": { + "External id": 982226,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938880698.419, "dur": 60.218, + "args": { + "External id": 982227,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4818 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.4", "pid": 2338710, "tid": 2379450, + "ts": 6345938880927.444, "dur": 769.514, + "args": { + "External id": 982228,"Record function id": 0, "Ev Idx": 4819 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2338710, "tid": 2379450, + "ts": 6345938880947.230, "dur": 735.571, + "args": { + "External id": 982229,"Record function id": 0, "Ev Idx": 4820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938881034.093, "dur": 8.504, + "args": { + "External id": 982230,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938881103.898, "dur": 39.502, + "args": { + "External id": 982231,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938881110.150, "dur": 3.420, + "args": { + "External id": 982232,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938881116.028, "dur": 0.549, + "args": { + "External id": 982233,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938881118.493, "dur": 0.444, + "args": { + "External id": 982234,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938881121.509, "dur": 0.554, + "args": { + "External id": 982235,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938881123.558, "dur": 0.619, + "args": { + "External id": 982236,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938881125.843, "dur": 0.346, + "args": { + "External id": 982237,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938881128.607, "dur": 2.915, + "args": { + "External id": 982238,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938881133.074, "dur": 0.381, + "args": { + "External id": 982239,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938881135.377, "dur": 2.064, + "args": { + "External id": 982240,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938881156.098, "dur": 61.190, + "args": { + "External id": 982241,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345938881257.350, "dur": 134.707, + "args": { + "External id": 982242,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938881271.632, "dur": 5.176, + "args": { + "External id": 982243,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345938881282.499, "dur": 11.878, + "args": { + "External id": 982244,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345938881287.368, "dur": 6.334, + "args": { + "External id": 982245,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938881291.562, "dur": 0.694, + "args": { + "External id": 982246,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938881302.638, "dur": 31.811, + "args": { + "External id": 982247,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938881305.542, "dur": 0.620, + "args": { + "External id": 982248,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938881308.050, "dur": 0.387, + "args": { + "External id": 982249,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938881309.924, "dur": 2.185, + "args": { + "External id": 982250,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938881313.586, "dur": 2.966, + "args": { + "External id": 982251,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938881318.777, "dur": 0.540, + "args": { + "External id": 982252,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938881322.274, "dur": 0.648, + "args": { + "External id": 982253,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938881324.504, "dur": 0.592, + "args": { + "External id": 982254,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938881326.532, "dur": 0.298, + "args": { + "External id": 982255,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938881330.068, "dur": 0.441, + "args": { + "External id": 982256,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938881346.252, "dur": 36.879, + "args": { + "External id": 982257,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938881447.827, "dur": 151.598, + "args": { + "External id": 982258,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938881483.229, "dur": 111.968, + "args": { + "External id": 982259,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4850, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938881496.613, "dur": 92.285, + "args": { + "External id": 982260,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938881620.227, "dur": 2.106, + "args": { + "External id": 982261,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4852, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938881713.772, "dur": 2116.656, + "args": { + "External id": 982262,"Sequence number": 10552254, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4853 + } + }, + { + "ph": "f", "id": 212, "pid": 2338710, "tid": 2379450, "ts": 6345938881713.772, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938881838.751, "dur": 125.908, + "args": { + "External id": 982263,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938882112.353, "dur": 52.557, + "args": { + "External id": 982264,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345938882194.147, "dur": 76.129, + "args": { + "External id": 982265,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938882284.024, "dur": 37.936, + "args": { + "External id": 982266,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938882332.059, "dur": 37.565, + "args": { + "External id": 982267,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938882377.130, "dur": 31.239, + "args": { + "External id": 982268,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938882418.368, "dur": 32.573, + "args": { + "External id": 982269,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938882485.663, "dur": 26.201, + "args": { + "External id": 982270,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938882536.314, "dur": 33.528, + "args": { + "External id": 982271,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938882595.894, "dur": 24.259, + "args": { + "External id": 982272,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938882636.791, "dur": 17.623, + "args": { + "External id": 982273,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938882662.888, "dur": 43.378, + "args": { + "External id": 982274,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938882709.887, "dur": 36.842, + "args": { + "External id": 982275,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345938882780.997, "dur": 387.601, + "args": { + "External id": 982276,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938882887.239, "dur": 9.562, + "args": { + "External id": 982277,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938882899.523, "dur": 3.356, + "args": { + "External id": 982278,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938882904.382, "dur": 2.932, + "args": { + "External id": 982279,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938882908.738, "dur": 3.877, + "args": { + "External id": 982280,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938882964.716, "dur": 8.248, + "args": { + "External id": 982281,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938882969.288, "dur": 3.460, + "args": { + "External id": 982282,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938882979.074, "dur": 59.813, + "args": { + "External id": 982283,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938882985.269, "dur": 2.229, + "args": { + "External id": 982284,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938883045.602, "dur": 2.487, + "args": { + "External id": 982285,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938883047.054, "dur": 0.899, + "args": { + "External id": 982286,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938883049.196, "dur": 63.047, + "args": { + "External id": 982287,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938883088.553, "dur": 3.595, + "args": { + "External id": 982288,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938883215.172, "dur": 34.371, + "args": { + "External id": 982289,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938883271.883, "dur": 19.445, + "args": { + "External id": 982290,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938883300.143, "dur": 60.767, + "args": { + "External id": 982291,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938883368.466, "dur": 45.743, + "args": { + "External id": 982292,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938883425.490, "dur": 25.996, + "args": { + "External id": 982293,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938883457.663, "dur": 34.741, + "args": { + "External id": 982294,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938883499.819, "dur": 30.722, + "args": { + "External id": 982295,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938883537.457, "dur": 33.667, + "args": { + "External id": 982296,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345938883599.608, "dur": 27.143, + "args": { + "External id": 982297,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938883645.406, "dur": 33.110, + "args": { + "External id": 982298,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938883696.562, "dur": 20.189, + "args": { + "External id": 982299,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938883738.643, "dur": 17.597, + "args": { + "External id": 982300,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345938883775.689, "dur": 21.187, + "args": { + "External id": 982301,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938883884.151, "dur": 17.221, + "args": { + "External id": 982302,"Record function id": 0, "Ev Idx": 4893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938883887.888, "dur": 12.314, + "args": { + "External id": 982303,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938883892.776, "dur": 6.361, + "args": { + "External id": 982304,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938883894.625, "dur": 4.402, + "args": { + "External id": 982305,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938883906.105, "dur": 6.087, + "args": { + "External id": 982306,"Record function id": 0, "Ev Idx": 4897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938883908.002, "dur": 3.610, + "args": { + "External id": 982307,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938883908.867, "dur": 2.143, + "args": { + "External id": 982308,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938883909.946, "dur": 0.960, + "args": { + "External id": 982309,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938883919.255, "dur": 4.828, + "args": { + "External id": 982310,"Record function id": 0, "Ev Idx": 4901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938883920.625, "dur": 2.934, + "args": { + "External id": 982311,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938883921.238, "dur": 1.776, + "args": { + "External id": 982312,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938883921.941, "dur": 0.952, + "args": { + "External id": 982313,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938883927.780, "dur": 6.824, + "args": { + "External id": 982314,"Record function id": 0, "Ev Idx": 4905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938883929.249, "dur": 4.864, + "args": { + "External id": 982315,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938883930.212, "dur": 3.432, + "args": { + "External id": 982316,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938883930.553, "dur": 3.019, + "args": { + "External id": 982317,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938883938.248, "dur": 4.765, + "args": { + "External id": 982318,"Record function id": 0, "Ev Idx": 4909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938883939.753, "dur": 2.785, + "args": { + "External id": 982319,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938883940.508, "dur": 1.535, + "args": { + "External id": 982320,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938883941.159, "dur": 0.788, + "args": { + "External id": 982321,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938883946.838, "dur": 5.399, + "args": { + "External id": 982322,"Record function id": 0, "Ev Idx": 4913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938883948.453, "dur": 3.280, + "args": { + "External id": 982323,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938883949.232, "dur": 2.027, + "args": { + "External id": 982324,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938883950.050, "dur": 1.132, + "args": { + "External id": 982325,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938883955.975, "dur": 4.177, + "args": { + "External id": 982326,"Record function id": 0, "Ev Idx": 4917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938883957.389, "dur": 2.274, + "args": { + "External id": 982327,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938883957.992, "dur": 1.189, + "args": { + "External id": 982328,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938883958.345, "dur": 0.757, + "args": { + "External id": 982329,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938883963.788, "dur": 7.189, + "args": { + "External id": 982330,"Record function id": 0, "Ev Idx": 4921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938883965.361, "dur": 5.137, + "args": { + "External id": 982331,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938883966.300, "dur": 3.669, + "args": { + "External id": 982332,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938883969.175, "dur": 0.718, + "args": { + "External id": 982333,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938883974.545, "dur": 4.708, + "args": { + "External id": 982334,"Record function id": 0, "Ev Idx": 4925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938883976.033, "dur": 2.720, + "args": { + "External id": 982335,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938883976.745, "dur": 1.519, + "args": { + "External id": 982336,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938883977.473, "dur": 0.715, + "args": { + "External id": 982337,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938883984.389, "dur": 60321.702, + "args": { + "External id": 982338,"Record function id": 0, "Sequence number": 10552253, "Fwd thread id": 1, "Ev Idx": 4929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938883986.665, "dur": 60308.061, + "args": { + "External id": 982339,"Sequence number": 10552253, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4930 + } + }, + { + "ph": "f", "id": 213, "pid": 2338710, "tid": 2379450, "ts": 6345938883986.665, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.5)", "pid": 2338710, "tid": 2379450, + "ts": 6345938884043.883, "dur": 86.491, + "args": { + "External id": 982340,"Record function id": 0, "Ev Idx": 4931 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.5)", "pid": 2338710, "tid": 2379450, + "ts": 6345938884141.301, "dur": 74.235, + "args": { + "External id": 982341,"Record function id": 0, "Ev Idx": 4932 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.5)", "pid": 2338710, "tid": 2379450, + "ts": 6345938884222.661, "dur": 60061.493, + "args": { + "External id": 982342,"Record function id": 0, "Ev Idx": 4933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938884329.968, "dur": 9.445, + "args": { + "External id": 982343,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938884351.953, "dur": 6.183, + "args": { + "External id": 982344,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938884376.769, "dur": 58893.334, + "args": { + "External id": 982345,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938884392.604, "dur": 58861.706, + "args": { + "External id": 982346,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938884492.894, "dur": 24.512, + "args": { + "External id": 982347,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938884541.488, "dur": 58654.170, + "args": { + "External id": 982348,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938884548.837, "dur": 58645.521, + "args": { + "External id": 982349,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938884555.521, "dur": 11.567, + "args": { + "External id": 982350,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938884569.552, "dur": 58618.384, + "args": { + "External id": 982351,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938943397.601, "dur": 14.595, + "args": { + "External id": 982352,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938943401.792, "dur": 9.811, + "args": { + "External id": 982353,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938943449.861, "dur": 424.480, + "args": { + "External id": 982354,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938943488.894, "dur": 379.713, + "args": { + "External id": 982355,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4946, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938943504.864, "dur": 356.904, + "args": { + "External id": 982356,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938943900.035, "dur": 2.997, + "args": { + "External id": 982357,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4948, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938943969.825, "dur": 9.220, + "args": { + "External id": 982358,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938944094.012, "dur": 4.034, + "args": { + "External id": 982359,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938944120.165, "dur": 3.033, + "args": { + "External id": 982360,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938944138.243, "dur": 1.343, + "args": { + "External id": 982361,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938944152.422, "dur": 1.155, + "args": { + "External id": 982362,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938944165.029, "dur": 0.879, + "args": { + "External id": 982363,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938944179.375, "dur": 0.983, + "args": { + "External id": 982364,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938944194.796, "dur": 2.462, + "args": { + "External id": 982365,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938944207.352, "dur": 0.783, + "args": { + "External id": 982366,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938944328.922, "dur": 3436.706, + "args": { + "External id": 982367,"Record function id": 0, "Ev Idx": 4958 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.4)", "pid": 2338710, "tid": 2379450, + "ts": 6345938944353.278, "dur": 1287.083, + "args": { + "External id": 982368,"Record function id": 0, "Ev Idx": 4959 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2338710, "tid": 2379450, + "ts": 6345938944372.786, "dur": 399.251, + "args": { + "External id": 982369,"Record function id": 0, "Ev Idx": 4960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938944475.715, "dur": 5.597, + "args": { + "External id": 982370,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938944484.871, "dur": 0.945, + "args": { + "External id": 982371,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938944487.929, "dur": 0.989, + "args": { + "External id": 982372,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938944492.857, "dur": 2.742, + "args": { + "External id": 982373,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938944497.330, "dur": 0.860, + "args": { + "External id": 982374,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938944500.267, "dur": 0.588, + "args": { + "External id": 982375,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938944502.535, "dur": 2.308, + "args": { + "External id": 982376,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938944509.130, "dur": 0.702, + "args": { + "External id": 982377,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938944511.339, "dur": 0.807, + "args": { + "External id": 982378,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938944513.788, "dur": 0.622, + "args": { + "External id": 982379,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938944534.597, "dur": 201.809, + "args": { + "External id": 982380,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938944554.812, "dur": 174.979, + "args": { + "External id": 982381,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938944580.125, "dur": 20.559, + "args": { + "External id": 982382,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938944608.321, "dur": 84.531, + "args": { + "External id": 982383,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938944612.560, "dur": 79.897, + "args": { + "External id": 982384,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938944617.627, "dur": 8.779, + "args": { + "External id": 982385,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938944629.637, "dur": 62.001, + "args": { + "External id": 982386,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4977 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.3", "pid": 2338710, "tid": 2379450, + "ts": 6345938944857.587, "dur": 773.746, + "args": { + "External id": 982387,"Record function id": 0, "Ev Idx": 4978 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2338710, "tid": 2379450, + "ts": 6345938944875.398, "dur": 742.002, + "args": { + "External id": 982388,"Record function id": 0, "Ev Idx": 4979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938944936.984, "dur": 6.670, + "args": { + "External id": 982389,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938944961.425, "dur": 41.806, + "args": { + "External id": 982390,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938944967.411, "dur": 3.095, + "args": { + "External id": 982391,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938944972.708, "dur": 0.585, + "args": { + "External id": 982392,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938944974.937, "dur": 0.455, + "args": { + "External id": 982393,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938944979.926, "dur": 0.441, + "args": { + "External id": 982394,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938944982.069, "dur": 0.444, + "args": { + "External id": 982395,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938944984.143, "dur": 0.612, + "args": { + "External id": 982396,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938944988.745, "dur": 3.038, + "args": { + "External id": 982397,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938944993.578, "dur": 0.428, + "args": { + "External id": 982398,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938944995.495, "dur": 2.363, + "args": { + "External id": 982399,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938945035.721, "dur": 99.182, + "args": { + "External id": 982400,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345938945179.005, "dur": 143.221, + "args": { + "External id": 982401,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938945193.769, "dur": 5.803, + "args": { + "External id": 982402,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345938945206.301, "dur": 12.577, + "args": { + "External id": 982403,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345938945211.043, "dur": 7.353, + "args": { + "External id": 982404,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938945215.554, "dur": 0.842, + "args": { + "External id": 982405,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345938945228.160, "dur": 30.672, + "args": { + "External id": 982406,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938945230.847, "dur": 0.384, + "args": { + "External id": 982407,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938945233.197, "dur": 0.320, + "args": { + "External id": 982408,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938945235.078, "dur": 2.069, + "args": { + "External id": 982409,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938945238.680, "dur": 2.751, + "args": { + "External id": 982410,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938945242.777, "dur": 0.469, + "args": { + "External id": 982411,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938945246.618, "dur": 0.472, + "args": { + "External id": 982412,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938945248.900, "dur": 0.367, + "args": { + "External id": 982413,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938945250.960, "dur": 0.508, + "args": { + "External id": 982414,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938945254.066, "dur": 0.450, + "args": { + "External id": 982415,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938945274.114, "dur": 39.333, + "args": { + "External id": 982416,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345938945376.940, "dur": 151.426, + "args": { + "External id": 982417,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 5008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938945412.613, "dur": 111.673, + "args": { + "External id": 982418,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5009, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345938945423.639, "dur": 94.813, + "args": { + "External id": 982419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 5010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345938945551.151, "dur": 2.132, + "args": { + "External id": 982420,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5011, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938945648.757, "dur": 2093.557, + "args": { + "External id": 982421,"Sequence number": 10552252, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5012 + } + }, + { + "ph": "f", "id": 214, "pid": 2338710, "tid": 2379450, "ts": 6345938945648.757, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938945777.249, "dur": 130.499, + "args": { + "External id": 982422,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 5013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938945960.875, "dur": 46.152, + "args": { + "External id": 982423,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 5014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345938946094.860, "dur": 74.953, + "args": { + "External id": 982424,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 5015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938946185.395, "dur": 36.814, + "args": { + "External id": 982425,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938946232.409, "dur": 36.839, + "args": { + "External id": 982426,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938946276.470, "dur": 30.359, + "args": { + "External id": 982427,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938946315.236, "dur": 32.337, + "args": { + "External id": 982428,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938946380.405, "dur": 29.698, + "args": { + "External id": 982429,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 5020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345938946444.332, "dur": 34.200, + "args": { + "External id": 982430,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938946504.968, "dur": 23.352, + "args": { + "External id": 982431,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938946544.233, "dur": 17.310, + "args": { + "External id": 982432,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938946569.936, "dur": 42.143, + "args": { + "External id": 982433,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938946616.403, "dur": 38.123, + "args": { + "External id": 982434,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345938946691.068, "dur": 350.045, + "args": { + "External id": 982435,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938946784.410, "dur": 7.310, + "args": { + "External id": 982436,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938946794.190, "dur": 2.885, + "args": { + "External id": 982437,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938946798.707, "dur": 2.579, + "args": { + "External id": 982438,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938946802.965, "dur": 2.296, + "args": { + "External id": 982439,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938946855.666, "dur": 5.831, + "args": { + "External id": 982440,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938946857.890, "dur": 3.294, + "args": { + "External id": 982441,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938946866.373, "dur": 54.923, + "args": { + "External id": 982442,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938946873.673, "dur": 2.057, + "args": { + "External id": 982443,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345938946926.778, "dur": 2.990, + "args": { + "External id": 982444,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938946928.254, "dur": 1.337, + "args": { + "External id": 982445,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345938946931.138, "dur": 25.050, + "args": { + "External id": 982446,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938946936.124, "dur": 3.040, + "args": { + "External id": 982447,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345938947134.185, "dur": 35.113, + "args": { + "External id": 982448,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938947195.900, "dur": 20.551, + "args": { + "External id": 982449,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938947225.448, "dur": 62.656, + "args": { + "External id": 982450,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938947295.767, "dur": 47.623, + "args": { + "External id": 982451,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938947354.483, "dur": 28.262, + "args": { + "External id": 982452,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938947388.685, "dur": 34.675, + "args": { + "External id": 982453,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938947434.504, "dur": 30.542, + "args": { + "External id": 982454,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345938947471.665, "dur": 33.834, + "args": { + "External id": 982455,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345938947528.461, "dur": 24.572, + "args": { + "External id": 982456,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 5047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938947573.578, "dur": 24.566, + "args": { + "External id": 982457,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345938947618.233, "dur": 18.630, + "args": { + "External id": 982458,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345938947656.508, "dur": 14.952, + "args": { + "External id": 982459,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345938947688.773, "dur": 21.351, + "args": { + "External id": 982460,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 5051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938947791.853, "dur": 17.220, + "args": { + "External id": 982461,"Record function id": 0, "Ev Idx": 5052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938947795.973, "dur": 11.987, + "args": { + "External id": 982462,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938947800.877, "dur": 6.090, + "args": { + "External id": 982463,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938947802.452, "dur": 4.411, + "args": { + "External id": 982464,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938947813.842, "dur": 5.196, + "args": { + "External id": 982465,"Record function id": 0, "Ev Idx": 5056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938947815.489, "dur": 2.999, + "args": { + "External id": 982466,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938947816.368, "dur": 1.565, + "args": { + "External id": 982467,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938947816.973, "dur": 0.874, + "args": { + "External id": 982468,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938947823.030, "dur": 7.257, + "args": { + "External id": 982469,"Record function id": 0, "Ev Idx": 5060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938947824.467, "dur": 5.282, + "args": { + "External id": 982470,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938947825.153, "dur": 4.100, + "args": { + "External id": 982471,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938947825.875, "dur": 3.272, + "args": { + "External id": 982472,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938947834.177, "dur": 4.807, + "args": { + "External id": 982473,"Record function id": 0, "Ev Idx": 5064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938947835.705, "dur": 2.737, + "args": { + "External id": 982474,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938947836.358, "dur": 1.598, + "args": { + "External id": 982475,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938947836.902, "dur": 0.932, + "args": { + "External id": 982476,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938947842.640, "dur": 4.429, + "args": { + "External id": 982477,"Record function id": 0, "Ev Idx": 5068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938947844.248, "dur": 2.300, + "args": { + "External id": 982478,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938947844.879, "dur": 1.182, + "args": { + "External id": 982479,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938947845.330, "dur": 0.655, + "args": { + "External id": 982480,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938947850.760, "dur": 4.873, + "args": { + "External id": 982481,"Record function id": 0, "Ev Idx": 5072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938947852.289, "dur": 2.784, + "args": { + "External id": 982482,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938947853.052, "dur": 1.529, + "args": { + "External id": 982483,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938947853.634, "dur": 0.873, + "args": { + "External id": 982484,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938947859.448, "dur": 4.620, + "args": { + "External id": 982485,"Record function id": 0, "Ev Idx": 5076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938947861.008, "dur": 2.522, + "args": { + "External id": 982486,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938947861.698, "dur": 1.332, + "args": { + "External id": 982487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938947862.179, "dur": 0.735, + "args": { + "External id": 982488,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938947867.771, "dur": 4.283, + "args": { + "External id": 982489,"Record function id": 0, "Ev Idx": 5080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938947869.307, "dur": 2.246, + "args": { + "External id": 982490,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938947869.871, "dur": 1.209, + "args": { + "External id": 982491,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938947870.213, "dur": 0.789, + "args": { + "External id": 982492,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938947876.088, "dur": 6.552, + "args": { + "External id": 982493,"Record function id": 0, "Ev Idx": 5084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345938947877.605, "dur": 4.502, + "args": { + "External id": 982494,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938947878.253, "dur": 3.384, + "args": { + "External id": 982495,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345938947880.814, "dur": 0.747, + "args": { + "External id": 982496,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938947887.544, "dur": 61966.189, + "args": { + "External id": 982497,"Record function id": 0, "Sequence number": 10552251, "Fwd thread id": 1, "Ev Idx": 5088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345938947889.759, "dur": 61953.911, + "args": { + "External id": 982498,"Sequence number": 10552251, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5089 + } + }, + { + "ph": "f", "id": 215, "pid": 2338710, "tid": 2379450, "ts": 6345938947889.759, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.4)", "pid": 2338710, "tid": 2379450, + "ts": 6345938947923.658, "dur": 44.001, + "args": { + "External id": 982499,"Record function id": 0, "Ev Idx": 5090 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.4)", "pid": 2338710, "tid": 2379450, + "ts": 6345938947976.438, "dur": 136.243, + "args": { + "External id": 982500,"Record function id": 0, "Ev Idx": 5091 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.4)", "pid": 2338710, "tid": 2379450, + "ts": 6345938948122.988, "dur": 61710.228, + "args": { + "External id": 982501,"Record function id": 0, "Ev Idx": 5092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938948234.100, "dur": 9.064, + "args": { + "External id": 982502,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345938948257.386, "dur": 8.024, + "args": { + "External id": 982503,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938948281.939, "dur": 60453.214, + "args": { + "External id": 982504,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345938948298.397, "dur": 60419.439, + "args": { + "External id": 982505,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345938948401.696, "dur": 22.479, + "args": { + "External id": 982506,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345938948448.725, "dur": 60205.479, + "args": { + "External id": 982507,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 5098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345938948453.180, "dur": 60199.486, + "args": { + "External id": 982508,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 5099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345938948458.744, "dur": 11.923, + "args": { + "External id": 982509,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345938948473.569, "dur": 60172.064, + "args": { + "External id": 982510,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 5101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939008870.304, "dur": 15.393, + "args": { + "External id": 982511,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 5102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939008875.288, "dur": 9.977, + "args": { + "External id": 982512,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345939008921.676, "dur": 554.668, + "args": { + "External id": 982513,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 5104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345939008962.146, "dur": 506.607, + "args": { + "External id": 982514,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5105, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345939008979.147, "dur": 480.925, + "args": { + "External id": 982515,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 5106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345939009509.356, "dur": 2.919, + "args": { + "External id": 982516,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5107, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939009590.809, "dur": 9.313, + "args": { + "External id": 982517,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939009654.437, "dur": 1.537, + "args": { + "External id": 982518,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939009673.869, "dur": 5.131, + "args": { + "External id": 982519,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939009690.946, "dur": 0.877, + "args": { + "External id": 982520,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939009704.823, "dur": 0.919, + "args": { + "External id": 982521,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939009717.040, "dur": 0.911, + "args": { + "External id": 982522,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939009732.141, "dur": 4.323, + "args": { + "External id": 982523,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939009748.482, "dur": 2.562, + "args": { + "External id": 982524,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939009761.553, "dur": 0.672, + "args": { + "External id": 982525,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345939009870.723, "dur": 3491.565, + "args": { + "External id": 982526,"Record function id": 0, "Ev Idx": 5117 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.3)", "pid": 2338710, "tid": 2379450, + "ts": 6345939009893.846, "dur": 1376.855, + "args": { + "External id": 982527,"Record function id": 0, "Ev Idx": 5118 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2338710, "tid": 2379450, + "ts": 6345939009910.974, "dur": 497.917, + "args": { + "External id": 982528,"Record function id": 0, "Ev Idx": 5119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939010101.290, "dur": 6.723, + "args": { + "External id": 982529,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939010115.709, "dur": 0.935, + "args": { + "External id": 982530,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939010118.725, "dur": 3.278, + "args": { + "External id": 982531,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939010126.452, "dur": 0.672, + "args": { + "External id": 982532,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939010128.724, "dur": 0.769, + "args": { + "External id": 982533,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939010131.170, "dur": 0.885, + "args": { + "External id": 982534,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939010133.753, "dur": 1.951, + "args": { + "External id": 982535,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939010139.738, "dur": 0.684, + "args": { + "External id": 982536,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939010142.015, "dur": 0.581, + "args": { + "External id": 982537,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939010144.269, "dur": 0.831, + "args": { + "External id": 982538,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345939010168.598, "dur": 200.960, + "args": { + "External id": 982539,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345939010189.510, "dur": 173.888, + "args": { + "External id": 982540,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939010212.808, "dur": 19.900, + "args": { + "External id": 982541,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345939010238.217, "dur": 89.666, + "args": { + "External id": 982542,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 5133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345939010241.179, "dur": 86.290, + "args": { + "External id": 982543,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 5134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939010245.839, "dur": 7.339, + "args": { + "External id": 982544,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345939010255.113, "dur": 71.527, + "args": { + "External id": 982545,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 5136 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.2", "pid": 2338710, "tid": 2379450, + "ts": 6345939010502.344, "dur": 759.803, + "args": { + "External id": 982546,"Record function id": 0, "Ev Idx": 5137 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2338710, "tid": 2379450, + "ts": 6345939010522.326, "dur": 725.598, + "args": { + "External id": 982547,"Record function id": 0, "Ev Idx": 5138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939010590.870, "dur": 7.397, + "args": { + "External id": 982548,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345939010615.822, "dur": 36.817, + "args": { + "External id": 982549,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939010621.609, "dur": 1.983, + "args": { + "External id": 982550,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939010625.957, "dur": 2.748, + "args": { + "External id": 982551,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939010630.222, "dur": 0.605, + "args": { + "External id": 982552,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939010632.399, "dur": 0.706, + "args": { + "External id": 982553,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939010636.293, "dur": 0.513, + "args": { + "External id": 982554,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939010638.305, "dur": 2.731, + "args": { + "External id": 982555,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939010642.763, "dur": 0.362, + "args": { + "External id": 982556,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939010645.499, "dur": 0.332, + "args": { + "External id": 982557,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939010647.551, "dur": 0.582, + "args": { + "External id": 982558,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345939010664.071, "dur": 53.933, + "args": { + "External id": 982559,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345939010751.692, "dur": 135.970, + "args": { + "External id": 982560,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 5151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939010762.466, "dur": 3.733, + "args": { + "External id": 982561,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345939010775.950, "dur": 11.583, + "args": { + "External id": 982562,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345939010780.724, "dur": 6.328, + "args": { + "External id": 982563,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 5154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939010784.902, "dur": 0.710, + "args": { + "External id": 982564,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345939010795.607, "dur": 29.550, + "args": { + "External id": 982565,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939010798.481, "dur": 0.448, + "args": { + "External id": 982566,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939010802.164, "dur": 0.312, + "args": { + "External id": 982567,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939010803.884, "dur": 2.830, + "args": { + "External id": 982568,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939010808.220, "dur": 2.042, + "args": { + "External id": 982569,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939010812.019, "dur": 0.526, + "args": { + "External id": 982570,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939010814.019, "dur": 0.320, + "args": { + "External id": 982571,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939010817.171, "dur": 0.278, + "args": { + "External id": 982572,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939010818.972, "dur": 0.390, + "args": { + "External id": 982573,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939010820.686, "dur": 0.327, + "args": { + "External id": 982574,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345939010844.194, "dur": 34.780, + "args": { + "External id": 982575,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345939010937.510, "dur": 216.990, + "args": { + "External id": 982576,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 5167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345939010973.318, "dur": 176.351, + "args": { + "External id": 982577,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5168, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345939010985.740, "dur": 158.058, + "args": { + "External id": 982578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 5169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345939011179.372, "dur": 2.633, + "args": { + "External id": 982579,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5170, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345939011279.090, "dur": 2061.081, + "args": { + "External id": 982580,"Sequence number": 10552250, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5171 + } + }, + { + "ph": "f", "id": 216, "pid": 2338710, "tid": 2379450, "ts": 6345939011279.090, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939011411.114, "dur": 127.885, + "args": { + "External id": 982581,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 5172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345939011591.317, "dur": 46.783, + "args": { + "External id": 982582,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 5173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345939011660.383, "dur": 62.946, + "args": { + "External id": 982583,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 5174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939011737.279, "dur": 34.650, + "args": { + "External id": 982584,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939011778.882, "dur": 36.412, + "args": { + "External id": 982585,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939011822.309, "dur": 31.498, + "args": { + "External id": 982586,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939011861.579, "dur": 31.558, + "args": { + "External id": 982587,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345939011923.291, "dur": 25.765, + "args": { + "External id": 982588,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 5179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345939011969.489, "dur": 33.545, + "args": { + "External id": 982589,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345939012093.498, "dur": 33.870, + "args": { + "External id": 982590,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345939012147.937, "dur": 17.224, + "args": { + "External id": 982591,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939012183.404, "dur": 51.120, + "args": { + "External id": 982592,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939012239.057, "dur": 37.382, + "args": { + "External id": 982593,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345939012311.690, "dur": 319.828, + "args": { + "External id": 982594,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939012422.512, "dur": 9.145, + "args": { + "External id": 982595,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939012434.659, "dur": 3.761, + "args": { + "External id": 982596,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939012440.343, "dur": 2.302, + "args": { + "External id": 982597,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939012444.068, "dur": 2.545, + "args": { + "External id": 982598,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345939012503.223, "dur": 5.826, + "args": { + "External id": 982599,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939012505.157, "dur": 3.681, + "args": { + "External id": 982600,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345939012511.439, "dur": 39.075, + "args": { + "External id": 982601,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939012518.060, "dur": 4.930, + "args": { + "External id": 982602,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345939012552.390, "dur": 4.208, + "args": { + "External id": 982603,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939012555.492, "dur": 0.939, + "args": { + "External id": 982604,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345939012558.137, "dur": 22.005, + "args": { + "External id": 982605,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939012562.578, "dur": 4.354, + "args": { + "External id": 982606,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345939012673.398, "dur": 36.665, + "args": { + "External id": 982607,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345939012730.219, "dur": 18.150, + "args": { + "External id": 982608,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939012756.804, "dur": 49.702, + "args": { + "External id": 982609,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939012813.916, "dur": 44.849, + "args": { + "External id": 982610,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939012869.311, "dur": 23.604, + "args": { + "External id": 982611,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939012899.174, "dur": 34.424, + "args": { + "External id": 982612,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939012941.158, "dur": 29.674, + "args": { + "External id": 982613,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939012977.894, "dur": 52.806, + "args": { + "External id": 982614,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345939013103.699, "dur": 36.305, + "args": { + "External id": 982615,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 5206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345939013164.560, "dur": 26.915, + "args": { + "External id": 982616,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345939013212.070, "dur": 20.306, + "args": { + "External id": 982617,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345939013250.771, "dur": 16.411, + "args": { + "External id": 982618,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345939013289.066, "dur": 18.123, + "args": { + "External id": 982619,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 5210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939013388.160, "dur": 17.400, + "args": { + "External id": 982620,"Record function id": 0, "Ev Idx": 5211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939013392.074, "dur": 12.320, + "args": { + "External id": 982621,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939013397.033, "dur": 6.335, + "args": { + "External id": 982622,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939013398.887, "dur": 4.381, + "args": { + "External id": 982623,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939013410.056, "dur": 6.130, + "args": { + "External id": 982624,"Record function id": 0, "Ev Idx": 5215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939013412.332, "dur": 3.355, + "args": { + "External id": 982625,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939013413.154, "dur": 2.066, + "args": { + "External id": 982626,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939013413.935, "dur": 1.165, + "args": { + "External id": 982627,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939013420.171, "dur": 4.855, + "args": { + "External id": 982628,"Record function id": 0, "Ev Idx": 5219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939013421.755, "dur": 2.770, + "args": { + "External id": 982629,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939013422.472, "dur": 1.590, + "args": { + "External id": 982630,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939013423.304, "dur": 0.635, + "args": { + "External id": 982631,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939013428.880, "dur": 6.542, + "args": { + "External id": 982632,"Record function id": 0, "Ev Idx": 5223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939013430.259, "dur": 4.693, + "args": { + "External id": 982633,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939013430.860, "dur": 3.650, + "args": { + "External id": 982634,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939013431.232, "dur": 3.205, + "args": { + "External id": 982635,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939013439.077, "dur": 4.055, + "args": { + "External id": 982636,"Record function id": 0, "Ev Idx": 5227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939013440.474, "dur": 2.162, + "args": { + "External id": 982637,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939013441.030, "dur": 1.130, + "args": { + "External id": 982638,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939013441.367, "dur": 0.680, + "args": { + "External id": 982639,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939013446.919, "dur": 4.928, + "args": { + "External id": 982640,"Record function id": 0, "Ev Idx": 5231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939013448.521, "dur": 2.806, + "args": { + "External id": 982641,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939013449.139, "dur": 1.755, + "args": { + "External id": 982642,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939013449.995, "dur": 0.824, + "args": { + "External id": 982643,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939013455.637, "dur": 4.563, + "args": { + "External id": 982644,"Record function id": 0, "Ev Idx": 5235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939013457.420, "dur": 2.314, + "args": { + "External id": 982645,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939013458.150, "dur": 1.143, + "args": { + "External id": 982646,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939013458.479, "dur": 0.739, + "args": { + "External id": 982647,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939013463.854, "dur": 4.333, + "args": { + "External id": 982648,"Record function id": 0, "Ev Idx": 5239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939013465.266, "dur": 2.416, + "args": { + "External id": 982649,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939013465.800, "dur": 1.381, + "args": { + "External id": 982650,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939013466.293, "dur": 0.782, + "args": { + "External id": 982651,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939013472.300, "dur": 6.756, + "args": { + "External id": 982652,"Record function id": 0, "Ev Idx": 5243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939013473.684, "dur": 4.910, + "args": { + "External id": 982653,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939013474.413, "dur": 3.737, + "args": { + "External id": 982654,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939013477.326, "dur": 0.714, + "args": { + "External id": 982655,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345939013484.260, "dur": 60153.598, + "args": { + "External id": 982656,"Record function id": 0, "Sequence number": 10552249, "Fwd thread id": 1, "Ev Idx": 5247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345939013486.287, "dur": 60140.042, + "args": { + "External id": 982657,"Sequence number": 10552249, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5248 + } + }, + { + "ph": "f", "id": 217, "pid": 2338710, "tid": 2379450, "ts": 6345939013486.287, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.3)", "pid": 2338710, "tid": 2379450, + "ts": 6345939013519.866, "dur": 46.920, + "args": { + "External id": 982658,"Record function id": 0, "Ev Idx": 5249 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.3)", "pid": 2338710, "tid": 2379450, + "ts": 6345939013576.210, "dur": 67.715, + "args": { + "External id": 982659,"Record function id": 0, "Ev Idx": 5250 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.3)", "pid": 2338710, "tid": 2379450, + "ts": 6345939013650.951, "dur": 59965.011, + "args": { + "External id": 982660,"Record function id": 0, "Ev Idx": 5251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939013755.808, "dur": 7.900, + "args": { + "External id": 982661,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939013777.257, "dur": 5.255, + "args": { + "External id": 982662,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345939013799.104, "dur": 58723.054, + "args": { + "External id": 982663,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345939013814.935, "dur": 58691.816, + "args": { + "External id": 982664,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939013919.172, "dur": 24.358, + "args": { + "External id": 982665,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345939013966.995, "dur": 58487.097, + "args": { + "External id": 982666,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 5257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345939013970.190, "dur": 58482.239, + "args": { + "External id": 982667,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 5258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939013975.895, "dur": 11.800, + "args": { + "External id": 982668,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345939013990.382, "dur": 58456.138, + "args": { + "External id": 982669,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 5260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939072647.462, "dur": 15.269, + "args": { + "External id": 982670,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 5261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939072652.197, "dur": 9.963, + "args": { + "External id": 982671,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345939072699.086, "dur": 499.589, + "args": { + "External id": 982672,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 5263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345939072739.108, "dur": 451.761, + "args": { + "External id": 982673,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5264, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345939072754.465, "dur": 426.132, + "args": { + "External id": 982674,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 5265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345939073234.772, "dur": 2.871, + "args": { + "External id": 982675,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5266, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939073329.719, "dur": 9.152, + "args": { + "External id": 982676,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939073404.766, "dur": 1.598, + "args": { + "External id": 982677,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939073426.781, "dur": 1.711, + "args": { + "External id": 982678,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939073445.963, "dur": 1.127, + "args": { + "External id": 982679,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939073463.763, "dur": 1.034, + "args": { + "External id": 982680,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939073477.380, "dur": 1.148, + "args": { + "External id": 982681,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939073492.797, "dur": 1.174, + "args": { + "External id": 982682,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939073512.391, "dur": 3.706, + "args": { + "External id": 982683,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939073531.171, "dur": 0.937, + "args": { + "External id": 982684,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345939073656.157, "dur": 3562.085, + "args": { + "External id": 982685,"Record function id": 0, "Ev Idx": 5276 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.2)", "pid": 2338710, "tid": 2379450, + "ts": 6345939073681.174, "dur": 1319.561, + "args": { + "External id": 982686,"Record function id": 0, "Ev Idx": 5277 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2338710, "tid": 2379450, + "ts": 6345939073699.403, "dur": 484.565, + "args": { + "External id": 982687,"Record function id": 0, "Ev Idx": 5278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939073804.708, "dur": 6.092, + "args": { + "External id": 982688,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939073814.447, "dur": 0.994, + "args": { + "External id": 982689,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939073817.536, "dur": 1.398, + "args": { + "External id": 982690,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939073821.284, "dur": 3.274, + "args": { + "External id": 982691,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939073826.260, "dur": 0.994, + "args": { + "External id": 982692,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939073829.144, "dur": 0.835, + "args": { + "External id": 982693,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939073834.165, "dur": 2.421, + "args": { + "External id": 982694,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939073838.635, "dur": 0.790, + "args": { + "External id": 982695,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939073841.220, "dur": 0.848, + "args": { + "External id": 982696,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939073843.604, "dur": 0.898, + "args": { + "External id": 982697,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345939073867.702, "dur": 271.425, + "args": { + "External id": 982698,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345939073888.257, "dur": 242.629, + "args": { + "External id": 982699,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939073910.258, "dur": 20.696, + "args": { + "External id": 982700,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345939073936.370, "dur": 110.385, + "args": { + "External id": 982701,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 5292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345939073939.555, "dur": 106.657, + "args": { + "External id": 982702,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 5293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939073945.774, "dur": 10.305, + "args": { + "External id": 982703,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345939073959.192, "dur": 85.554, + "args": { + "External id": 982704,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 5295 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.1", "pid": 2338710, "tid": 2379450, + "ts": 6345939074284.636, "dur": 706.996, + "args": { + "External id": 982705,"Record function id": 0, "Ev Idx": 5296 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2338710, "tid": 2379450, + "ts": 6345939074308.347, "dur": 668.073, + "args": { + "External id": 982706,"Record function id": 0, "Ev Idx": 5297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939074377.829, "dur": 8.291, + "args": { + "External id": 982707,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345939074406.385, "dur": 50.675, + "args": { + "External id": 982708,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939074413.770, "dur": 4.963, + "args": { + "External id": 982709,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939074428.627, "dur": 0.398, + "args": { + "External id": 982710,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939074431.206, "dur": 0.666, + "args": { + "External id": 982711,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939074435.297, "dur": 0.853, + "args": { + "External id": 982712,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939074437.527, "dur": 0.445, + "args": { + "External id": 982713,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939074439.573, "dur": 1.958, + "args": { + "External id": 982714,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939074443.366, "dur": 2.733, + "args": { + "External id": 982715,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939074447.557, "dur": 0.626, + "args": { + "External id": 982716,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939074451.313, "dur": 0.416, + "args": { + "External id": 982717,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345939074470.837, "dur": 52.217, + "args": { + "External id": 982718,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345939074564.967, "dur": 133.245, + "args": { + "External id": 982719,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 5310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939074577.639, "dur": 4.413, + "args": { + "External id": 982720,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345939074587.890, "dur": 13.941, + "args": { + "External id": 982721,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345939074594.050, "dur": 7.224, + "args": { + "External id": 982722,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 5313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939074598.167, "dur": 1.215, + "args": { + "External id": 982723,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345939074610.120, "dur": 28.914, + "args": { + "External id": 982724,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939074612.620, "dur": 0.397, + "args": { + "External id": 982725,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939074615.048, "dur": 0.673, + "args": { + "External id": 982726,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939074618.834, "dur": 0.455, + "args": { + "External id": 982727,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939074621.199, "dur": 2.368, + "args": { + "External id": 982728,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939074624.897, "dur": 0.629, + "args": { + "External id": 982729,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939074627.538, "dur": 0.322, + "args": { + "External id": 982730,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939074629.339, "dur": 0.531, + "args": { + "External id": 982731,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939074631.481, "dur": 1.471, + "args": { + "External id": 982732,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939074634.760, "dur": 0.331, + "args": { + "External id": 982733,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345939074655.795, "dur": 34.289, + "args": { + "External id": 982734,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345939074749.605, "dur": 143.590, + "args": { + "External id": 982735,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 5326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345939074785.270, "dur": 104.055, + "args": { + "External id": 982736,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5327, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345939074796.155, "dur": 88.709, + "args": { + "External id": 982737,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 5328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345939074913.674, "dur": 2.071, + "args": { + "External id": 982738,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5329, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345939075034.442, "dur": 2163.041, + "args": { + "External id": 982739,"Sequence number": 10552248, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5330 + } + }, + { + "ph": "f", "id": 218, "pid": 2338710, "tid": 2379450, "ts": 6345939075034.442, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939075207.400, "dur": 135.291, + "args": { + "External id": 982740,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 5331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345939075400.971, "dur": 46.938, + "args": { + "External id": 982741,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 5332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345939075466.943, "dur": 63.034, + "args": { + "External id": 982742,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 5333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939075540.597, "dur": 37.818, + "args": { + "External id": 982743,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939075587.876, "dur": 40.050, + "args": { + "External id": 982744,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939075634.790, "dur": 31.417, + "args": { + "External id": 982745,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939075673.737, "dur": 34.540, + "args": { + "External id": 982746,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345939075736.864, "dur": 29.313, + "args": { + "External id": 982747,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 5338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345939075794.305, "dur": 35.489, + "args": { + "External id": 982748,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345939075860.036, "dur": 22.217, + "args": { + "External id": 982749,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345939075899.107, "dur": 17.774, + "args": { + "External id": 982750,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939075926.938, "dur": 39.727, + "args": { + "External id": 982751,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939075970.572, "dur": 35.300, + "args": { + "External id": 982752,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345939076128.579, "dur": 333.067, + "args": { + "External id": 982753,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939076244.609, "dur": 11.466, + "args": { + "External id": 982754,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939076259.577, "dur": 4.524, + "args": { + "External id": 982755,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939076265.621, "dur": 3.919, + "args": { + "External id": 982756,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939076270.815, "dur": 6.159, + "args": { + "External id": 982757,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345939076329.906, "dur": 10.074, + "args": { + "External id": 982758,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939076332.505, "dur": 7.270, + "args": { + "External id": 982759,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345939076342.366, "dur": 37.668, + "args": { + "External id": 982760,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939076349.289, "dur": 1.990, + "args": { + "External id": 982761,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345939076381.721, "dur": 4.940, + "args": { + "External id": 982762,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939076385.605, "dur": 0.904, + "args": { + "External id": 982763,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345939076387.852, "dur": 25.194, + "args": { + "External id": 982764,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939076392.692, "dur": 5.374, + "args": { + "External id": 982765,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345939076508.430, "dur": 31.379, + "args": { + "External id": 982766,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345939076563.213, "dur": 19.908, + "args": { + "External id": 982767,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939076591.799, "dur": 60.822, + "args": { + "External id": 982768,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939076660.747, "dur": 48.564, + "args": { + "External id": 982769,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939076719.630, "dur": 24.990, + "args": { + "External id": 982770,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939076750.647, "dur": 38.958, + "args": { + "External id": 982771,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939076796.508, "dur": 31.139, + "args": { + "External id": 982772,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939076835.755, "dur": 37.616, + "args": { + "External id": 982773,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345939076897.231, "dur": 28.753, + "args": { + "External id": 982774,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 5365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345939076948.580, "dur": 28.655, + "args": { + "External id": 982775,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345939076995.474, "dur": 39.914, + "args": { + "External id": 982776,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345939077101.419, "dur": 21.690, + "args": { + "External id": 982777,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345939077143.902, "dur": 18.110, + "args": { + "External id": 982778,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 5369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939077244.340, "dur": 17.972, + "args": { + "External id": 982779,"Record function id": 0, "Ev Idx": 5370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939077248.450, "dur": 12.587, + "args": { + "External id": 982780,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939077253.676, "dur": 6.250, + "args": { + "External id": 982781,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939077255.402, "dur": 4.393, + "args": { + "External id": 982782,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939077267.093, "dur": 5.702, + "args": { + "External id": 982783,"Record function id": 0, "Ev Idx": 5374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939077268.901, "dur": 3.361, + "args": { + "External id": 982784,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939077269.854, "dur": 1.803, + "args": { + "External id": 982785,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939077270.483, "dur": 1.084, + "args": { + "External id": 982786,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939077276.721, "dur": 5.841, + "args": { + "External id": 982787,"Record function id": 0, "Ev Idx": 5378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939077278.502, "dur": 3.453, + "args": { + "External id": 982788,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939077279.419, "dur": 2.063, + "args": { + "External id": 982789,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939077280.365, "dur": 1.017, + "args": { + "External id": 982790,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939077286.395, "dur": 7.433, + "args": { + "External id": 982791,"Record function id": 0, "Ev Idx": 5382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939077288.224, "dur": 5.078, + "args": { + "External id": 982792,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939077288.955, "dur": 3.896, + "args": { + "External id": 982793,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939077289.510, "dur": 3.270, + "args": { + "External id": 982794,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939077297.747, "dur": 4.699, + "args": { + "External id": 982795,"Record function id": 0, "Ev Idx": 5386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939077299.400, "dur": 2.570, + "args": { + "External id": 982796,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939077300.279, "dur": 1.233, + "args": { + "External id": 982797,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939077300.583, "dur": 0.853, + "args": { + "External id": 982798,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939077306.142, "dur": 7.433, + "args": { + "External id": 982799,"Record function id": 0, "Ev Idx": 5390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939077307.500, "dur": 5.573, + "args": { + "External id": 982800,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939077308.169, "dur": 4.340, + "args": { + "External id": 982801,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939077311.196, "dur": 1.204, + "args": { + "External id": 982802,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939077317.349, "dur": 5.205, + "args": { + "External id": 982803,"Record function id": 0, "Ev Idx": 5394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939077319.065, "dur": 3.005, + "args": { + "External id": 982804,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939077320.055, "dur": 1.557, + "args": { + "External id": 982805,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939077320.566, "dur": 0.966, + "args": { + "External id": 982806,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939077326.176, "dur": 4.273, + "args": { + "External id": 982807,"Record function id": 0, "Ev Idx": 5398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939077327.565, "dur": 2.395, + "args": { + "External id": 982808,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939077328.248, "dur": 1.262, + "args": { + "External id": 982809,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939077328.722, "dur": 0.709, + "args": { + "External id": 982810,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939077333.989, "dur": 4.549, + "args": { + "External id": 982811,"Record function id": 0, "Ev Idx": 5402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939077335.824, "dur": 2.221, + "args": { + "External id": 982812,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939077336.371, "dur": 1.223, + "args": { + "External id": 982813,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939077336.789, "dur": 0.727, + "args": { + "External id": 982814,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345939077343.251, "dur": 61607.659, + "args": { + "External id": 982815,"Record function id": 0, "Sequence number": 10552247, "Fwd thread id": 1, "Ev Idx": 5406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345939077345.372, "dur": 61596.023, + "args": { + "External id": 982816,"Sequence number": 10552247, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5407 + } + }, + { + "ph": "f", "id": 219, "pid": 2338710, "tid": 2379450, "ts": 6345939077345.372, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.2)", "pid": 2338710, "tid": 2379450, + "ts": 6345939077382.404, "dur": 45.350, + "args": { + "External id": 982817,"Record function id": 0, "Ev Idx": 5408 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.2)", "pid": 2338710, "tid": 2379450, + "ts": 6345939077440.371, "dur": 74.859, + "args": { + "External id": 982818,"Record function id": 0, "Ev Idx": 5409 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.2)", "pid": 2338710, "tid": 2379450, + "ts": 6345939077522.922, "dur": 61408.974, + "args": { + "External id": 982819,"Record function id": 0, "Ev Idx": 5410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939077625.483, "dur": 7.887, + "args": { + "External id": 982820,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939077644.667, "dur": 5.569, + "args": { + "External id": 982821,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345939077666.768, "dur": 60247.507, + "args": { + "External id": 982822,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345939077685.153, "dur": 60211.854, + "args": { + "External id": 982823,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939077787.614, "dur": 23.518, + "args": { + "External id": 982824,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345939077836.137, "dur": 60007.959, + "args": { + "External id": 982825,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 5416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345939077842.834, "dur": 59999.966, + "args": { + "External id": 982826,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 5417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939077848.131, "dur": 10.620, + "args": { + "External id": 982827,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345939077861.051, "dur": 59975.126, + "args": { + "External id": 982828,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 5419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939138085.608, "dur": 18.356, + "args": { + "External id": 982829,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 5420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939138091.475, "dur": 11.703, + "args": { + "External id": 982830,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345939138141.303, "dur": 435.734, + "args": { + "External id": 982831,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 5422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345939138182.789, "dur": 387.715, + "args": { + "External id": 982832,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5423, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345939138199.695, "dur": 363.714, + "args": { + "External id": 982833,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 5424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345939138607.725, "dur": 2.489, + "args": { + "External id": 982834,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5425, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939138683.342, "dur": 9.070, + "args": { + "External id": 982835,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939138746.584, "dur": 1.833, + "args": { + "External id": 982836,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939138766.541, "dur": 1.481, + "args": { + "External id": 982837,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939138785.516, "dur": 1.120, + "args": { + "External id": 982838,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939138800.876, "dur": 0.969, + "args": { + "External id": 982839,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939138814.950, "dur": 0.943, + "args": { + "External id": 982840,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939138826.996, "dur": 1.047, + "args": { + "External id": 982841,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939138843.683, "dur": 2.477, + "args": { + "External id": 982842,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939138860.918, "dur": 0.978, + "args": { + "External id": 982843,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345939138966.976, "dur": 3471.330, + "args": { + "External id": 982844,"Record function id": 0, "Ev Idx": 5435 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.1)", "pid": 2338710, "tid": 2379450, + "ts": 6345939138989.994, "dur": 1340.024, + "args": { + "External id": 982845,"Record function id": 0, "Ev Idx": 5436 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2338710, "tid": 2379450, + "ts": 6345939139006.930, "dur": 479.309, + "args": { + "External id": 982846,"Record function id": 0, "Ev Idx": 5437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939139175.236, "dur": 7.240, + "args": { + "External id": 982847,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939139186.478, "dur": 1.107, + "args": { + "External id": 982848,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939139190.078, "dur": 1.169, + "args": { + "External id": 982849,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939139193.291, "dur": 3.351, + "args": { + "External id": 982850,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939139198.563, "dur": 1.273, + "args": { + "External id": 982851,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939139204.065, "dur": 1.135, + "args": { + "External id": 982852,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939139207.414, "dur": 2.259, + "args": { + "External id": 982853,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939139211.282, "dur": 1.009, + "args": { + "External id": 982854,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939139214.129, "dur": 0.902, + "args": { + "External id": 982855,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939139219.025, "dur": 0.785, + "args": { + "External id": 982856,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345939139240.569, "dur": 206.736, + "args": { + "External id": 982857,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345939139272.432, "dur": 168.588, + "args": { + "External id": 982858,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939139294.677, "dur": 19.794, + "args": { + "External id": 982859,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345939139320.202, "dur": 84.661, + "args": { + "External id": 982860,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 5451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345939139323.755, "dur": 80.705, + "args": { + "External id": 982861,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 5452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939139328.756, "dur": 10.464, + "args": { + "External id": 982862,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345939139341.480, "dur": 62.230, + "args": { + "External id": 982863,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 5454 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.0", "pid": 2338710, "tid": 2379450, + "ts": 6345939139581.515, "dur": 739.211, + "args": { + "External id": 982864,"Record function id": 0, "Ev Idx": 5455 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2338710, "tid": 2379450, + "ts": 6345939139600.675, "dur": 704.968, + "args": { + "External id": 982865,"Record function id": 0, "Ev Idx": 5456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939139669.490, "dur": 5.641, + "args": { + "External id": 982866,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345939139693.069, "dur": 38.501, + "args": { + "External id": 982867,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939139699.346, "dur": 2.085, + "args": { + "External id": 982868,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939139704.133, "dur": 1.930, + "args": { + "External id": 982869,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939139707.821, "dur": 0.633, + "args": { + "External id": 982870,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939139710.285, "dur": 0.605, + "args": { + "External id": 982871,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939139714.452, "dur": 0.433, + "args": { + "External id": 982872,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939139716.599, "dur": 0.304, + "args": { + "External id": 982873,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939139718.253, "dur": 3.051, + "args": { + "External id": 982874,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939139724.159, "dur": 0.421, + "args": { + "External id": 982875,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939139726.194, "dur": 0.642, + "args": { + "External id": 982876,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345939139742.546, "dur": 46.909, + "args": { + "External id": 982877,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345939139824.993, "dur": 128.887, + "args": { + "External id": 982878,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 5469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939139836.423, "dur": 4.003, + "args": { + "External id": 982879,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345939139846.310, "dur": 11.936, + "args": { + "External id": 982880,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345939139851.337, "dur": 6.435, + "args": { + "External id": 982881,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 5472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939139855.556, "dur": 0.782, + "args": { + "External id": 982882,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345939139866.815, "dur": 32.095, + "args": { + "External id": 982883,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939139869.666, "dur": 0.602, + "args": { + "External id": 982884,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939139873.807, "dur": 0.636, + "args": { + "External id": 982885,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939139876.167, "dur": 0.545, + "args": { + "External id": 982886,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939139878.634, "dur": 4.080, + "args": { + "External id": 982887,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939139884.679, "dur": 0.568, + "args": { + "External id": 982888,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939139886.425, "dur": 0.755, + "args": { + "External id": 982889,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939139890.453, "dur": 0.721, + "args": { + "External id": 982890,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939139892.855, "dur": 0.525, + "args": { + "External id": 982891,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939139894.729, "dur": 0.638, + "args": { + "External id": 982892,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345939139910.961, "dur": 34.422, + "args": { + "External id": 982893,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345939140003.231, "dur": 210.723, + "args": { + "External id": 982894,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 5485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345939140095.703, "dur": 113.772, + "args": { + "External id": 982895,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5486, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345939140110.165, "dur": 94.411, + "args": { + "External id": 982896,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 5487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345939140238.974, "dur": 2.262, + "args": { + "External id": 982897,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5488, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345939140339.431, "dur": 2076.433, + "args": { + "External id": 982898,"Sequence number": 10552246, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5489 + } + }, + { + "ph": "f", "id": 220, "pid": 2338710, "tid": 2379450, "ts": 6345939140339.431, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939140467.058, "dur": 131.480, + "args": { + "External id": 982899,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 5490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345939140651.465, "dur": 48.535, + "args": { + "External id": 982900,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 5491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345939140717.877, "dur": 62.144, + "args": { + "External id": 982901,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 5492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939140790.620, "dur": 34.830, + "args": { + "External id": 982902,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939140835.218, "dur": 36.624, + "args": { + "External id": 982903,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939140878.896, "dur": 30.336, + "args": { + "External id": 982904,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939140917.373, "dur": 31.611, + "args": { + "External id": 982905,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345939140978.481, "dur": 27.612, + "args": { + "External id": 982906,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 5497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345939141092.497, "dur": 37.212, + "args": { + "External id": 982907,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345939141163.948, "dur": 22.037, + "args": { + "External id": 982908,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345939141203.830, "dur": 16.497, + "args": { + "External id": 982909,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939141229.642, "dur": 47.436, + "args": { + "External id": 982910,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939141281.231, "dur": 38.105, + "args": { + "External id": 982911,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345939141369.025, "dur": 324.114, + "args": { + "External id": 982912,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939141463.180, "dur": 7.894, + "args": { + "External id": 982913,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939141473.299, "dur": 3.342, + "args": { + "External id": 982914,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939141478.744, "dur": 2.458, + "args": { + "External id": 982915,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939141482.519, "dur": 3.046, + "args": { + "External id": 982916,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345939141557.801, "dur": 7.945, + "args": { + "External id": 982917,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939141560.260, "dur": 4.845, + "args": { + "External id": 982918,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345939141567.983, "dur": 38.614, + "args": { + "External id": 982919,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939141574.831, "dur": 2.124, + "args": { + "External id": 982920,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345939141610.887, "dur": 2.309, + "args": { + "External id": 982921,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939141612.520, "dur": 0.558, + "args": { + "External id": 982922,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345939141614.375, "dur": 21.346, + "args": { + "External id": 982923,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939141618.239, "dur": 2.652, + "args": { + "External id": 982924,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345939141741.130, "dur": 33.894, + "args": { + "External id": 982925,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345939141795.459, "dur": 19.882, + "args": { + "External id": 982926,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939141822.986, "dur": 47.567, + "args": { + "External id": 982927,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939141877.437, "dur": 43.212, + "args": { + "External id": 982928,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939141931.037, "dur": 23.691, + "args": { + "External id": 982929,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939141960.824, "dur": 35.004, + "args": { + "External id": 982930,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939142004.096, "dur": 92.403, + "args": { + "External id": 982931,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939142108.647, "dur": 39.944, + "args": { + "External id": 982932,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345939142176.610, "dur": 30.963, + "args": { + "External id": 982933,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 5524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345939142229.214, "dur": 28.964, + "args": { + "External id": 982934,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345939142278.021, "dur": 21.104, + "args": { + "External id": 982935,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345939142318.117, "dur": 20.662, + "args": { + "External id": 982936,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345939142355.740, "dur": 20.228, + "args": { + "External id": 982937,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 5528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939142464.688, "dur": 18.897, + "args": { + "External id": 982938,"Record function id": 0, "Ev Idx": 5529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939142468.448, "dur": 13.946, + "args": { + "External id": 982939,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939142473.429, "dur": 7.619, + "args": { + "External id": 982940,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939142475.081, "dur": 5.832, + "args": { + "External id": 982941,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939142488.151, "dur": 6.230, + "args": { + "External id": 982942,"Record function id": 0, "Ev Idx": 5533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939142490.111, "dur": 3.756, + "args": { + "External id": 982943,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939142490.895, "dur": 2.453, + "args": { + "External id": 982944,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939142491.879, "dur": 1.333, + "args": { + "External id": 982945,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939142498.410, "dur": 4.448, + "args": { + "External id": 982946,"Record function id": 0, "Ev Idx": 5537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939142499.786, "dur": 2.592, + "args": { + "External id": 982947,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939142500.352, "dur": 1.593, + "args": { + "External id": 982948,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939142500.944, "dur": 0.879, + "args": { + "External id": 982949,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939142506.735, "dur": 4.135, + "args": { + "External id": 982950,"Record function id": 0, "Ev Idx": 5541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939142508.125, "dur": 2.267, + "args": { + "External id": 982951,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939142508.798, "dur": 1.131, + "args": { + "External id": 982952,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939142509.166, "dur": 0.631, + "args": { + "External id": 982953,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939142514.501, "dur": 6.891, + "args": { + "External id": 982954,"Record function id": 0, "Ev Idx": 5545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939142515.958, "dur": 4.925, + "args": { + "External id": 982955,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939142516.520, "dur": 3.935, + "args": { + "External id": 982956,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939142516.863, "dur": 3.465, + "args": { + "External id": 982957,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939142525.142, "dur": 6.241, + "args": { + "External id": 982958,"Record function id": 0, "Ev Idx": 5549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939142526.407, "dur": 4.491, + "args": { + "External id": 982959,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939142526.952, "dur": 3.495, + "args": { + "External id": 982960,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939142529.488, "dur": 0.833, + "args": { + "External id": 982961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939142535.399, "dur": 5.052, + "args": { + "External id": 982962,"Record function id": 0, "Ev Idx": 5553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939142536.991, "dur": 2.984, + "args": { + "External id": 982963,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939142537.566, "dur": 1.943, + "args": { + "External id": 982964,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939142538.657, "dur": 0.763, + "args": { + "External id": 982965,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939142544.085, "dur": 4.106, + "args": { + "External id": 982966,"Record function id": 0, "Ev Idx": 5557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939142545.377, "dur": 2.304, + "args": { + "External id": 982967,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939142546.061, "dur": 1.185, + "args": { + "External id": 982968,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939142546.532, "dur": 0.588, + "args": { + "External id": 982969,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939142552.049, "dur": 4.501, + "args": { + "External id": 982970,"Record function id": 0, "Ev Idx": 5561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939142553.482, "dur": 2.585, + "args": { + "External id": 982971,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939142554.071, "dur": 1.505, + "args": { + "External id": 982972,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939142554.515, "dur": 0.934, + "args": { + "External id": 982973,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345939142562.215, "dur": 62762.336, + "args": { + "External id": 982974,"Record function id": 0, "Sequence number": 10552245, "Fwd thread id": 1, "Ev Idx": 5565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345939142564.560, "dur": 62748.106, + "args": { + "External id": 982975,"Sequence number": 10552245, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5566 + } + }, + { + "ph": "f", "id": 221, "pid": 2338710, "tid": 2379450, "ts": 6345939142564.560, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.1)", "pid": 2338710, "tid": 2379450, + "ts": 6345939142600.580, "dur": 48.595, + "args": { + "External id": 982976,"Record function id": 0, "Ev Idx": 5567 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.1)", "pid": 2338710, "tid": 2379450, + "ts": 6345939142658.609, "dur": 80.329, + "args": { + "External id": 982977,"Record function id": 0, "Ev Idx": 5568 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.1)", "pid": 2338710, "tid": 2379450, + "ts": 6345939142746.104, "dur": 62555.476, + "args": { + "External id": 982978,"Record function id": 0, "Ev Idx": 5569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939142851.568, "dur": 8.034, + "args": { + "External id": 982979,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939142870.510, "dur": 5.332, + "args": { + "External id": 982980,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345939142891.698, "dur": 61280.431, + "args": { + "External id": 982981,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345939142907.819, "dur": 61247.366, + "args": { + "External id": 982982,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939143038.445, "dur": 67.681, + "args": { + "External id": 982983,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345939143137.862, "dur": 60954.561, + "args": { + "External id": 982984,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 5575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345939143145.182, "dur": 60945.763, + "args": { + "External id": 982985,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 5576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939143151.368, "dur": 15.089, + "args": { + "External id": 982986,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345939143169.949, "dur": 60913.894, + "args": { + "External id": 982987,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 5578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939204311.494, "dur": 17.188, + "args": { + "External id": 982988,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 5579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939204316.870, "dur": 10.963, + "args": { + "External id": 982989,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345939204370.802, "dur": 489.173, + "args": { + "External id": 982990,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 5581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345939204415.909, "dur": 436.999, + "args": { + "External id": 982991,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5582, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345939204433.183, "dur": 411.495, + "args": { + "External id": 982992,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 5583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345939204890.022, "dur": 2.650, + "args": { + "External id": 982993,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5584, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939204970.057, "dur": 8.513, + "args": { + "External id": 982994,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939205103.185, "dur": 4.270, + "args": { + "External id": 982995,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939205128.665, "dur": 1.671, + "args": { + "External id": 982996,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939205145.144, "dur": 1.067, + "args": { + "External id": 982997,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939205160.222, "dur": 1.000, + "args": { + "External id": 982998,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939205176.648, "dur": 0.846, + "args": { + "External id": 982999,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939205191.257, "dur": 0.885, + "args": { + "External id": 983000,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939205205.342, "dur": 2.440, + "args": { + "External id": 983001,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939205220.425, "dur": 0.856, + "args": { + "External id": 983002,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345939205346.127, "dur": 2768.596, + "args": { + "External id": 983003,"Record function id": 0, "Ev Idx": 5594 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.0)", "pid": 2338710, "tid": 2379450, + "ts": 6345939205372.142, "dur": 537.758, + "args": { + "External id": 983004,"Record function id": 0, "Ev Idx": 5595 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2338710, "tid": 2379450, + "ts": 6345939205392.009, "dur": 413.830, + "args": { + "External id": 983005,"Record function id": 0, "Ev Idx": 5596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939205501.727, "dur": 6.147, + "args": { + "External id": 983006,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939205511.814, "dur": 1.237, + "args": { + "External id": 983007,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939205517.524, "dur": 1.237, + "args": { + "External id": 983008,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939205520.591, "dur": 1.070, + "args": { + "External id": 983009,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939205523.439, "dur": 0.980, + "args": { + "External id": 983010,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939205526.098, "dur": 1.013, + "args": { + "External id": 983011,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939205531.277, "dur": 2.338, + "args": { + "External id": 983012,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939205535.553, "dur": 3.225, + "args": { + "External id": 983013,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939205540.637, "dur": 0.912, + "args": { + "External id": 983014,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939205543.309, "dur": 1.123, + "args": { + "External id": 983015,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345939205568.012, "dur": 199.952, + "args": { + "External id": 983016,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345939205589.869, "dur": 171.490, + "args": { + "External id": 983017,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939205613.353, "dur": 17.634, + "args": { + "External id": 983018,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345939205636.502, "dur": 87.689, + "args": { + "External id": 983019,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 5610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345939205639.942, "dur": 83.685, + "args": { + "External id": 983020,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 5611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939205645.481, "dur": 8.894, + "args": { + "External id": 983021,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345939205657.583, "dur": 64.916, + "args": { + "External id": 983022,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 5613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345939205917.611, "dur": 2128.300, + "args": { + "External id": 983023,"Sequence number": 10552244, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5614 + } + }, + { + "ph": "f", "id": 222, "pid": 2338710, "tid": 2379450, "ts": 6345939205917.611, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939206112.188, "dur": 133.505, + "args": { + "External id": 983024,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 5615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345939206301.578, "dur": 47.443, + "args": { + "External id": 983025,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 5616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345939206367.792, "dur": 61.982, + "args": { + "External id": 983026,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 5617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939206440.309, "dur": 39.832, + "args": { + "External id": 983027,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939206491.640, "dur": 36.633, + "args": { + "External id": 983028,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939206535.590, "dur": 29.745, + "args": { + "External id": 983029,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939206571.818, "dur": 34.214, + "args": { + "External id": 983030,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345939206636.070, "dur": 29.612, + "args": { + "External id": 983031,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 5622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345939206686.921, "dur": 32.730, + "args": { + "External id": 983032,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345939206752.960, "dur": 24.477, + "args": { + "External id": 983033,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345939206794.113, "dur": 18.519, + "args": { + "External id": 983034,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939206820.544, "dur": 44.243, + "args": { + "External id": 983035,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939206868.467, "dur": 37.944, + "args": { + "External id": 983036,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345939206942.843, "dur": 395.817, + "args": { + "External id": 983037,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939207092.771, "dur": 10.908, + "args": { + "External id": 983038,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939207110.739, "dur": 3.540, + "args": { + "External id": 983039,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939207116.185, "dur": 3.149, + "args": { + "External id": 983040,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939207154.067, "dur": 5.350, + "args": { + "External id": 983041,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345939207214.295, "dur": 7.633, + "args": { + "External id": 983042,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939207218.210, "dur": 3.501, + "args": { + "External id": 983043,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345939207224.286, "dur": 34.464, + "args": { + "External id": 983044,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939207230.670, "dur": 1.842, + "args": { + "External id": 983045,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345939207262.683, "dur": 1.821, + "args": { + "External id": 983046,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939207263.703, "dur": 0.682, + "args": { + "External id": 983047,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345939207265.743, "dur": 17.759, + "args": { + "External id": 983048,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939207269.788, "dur": 0.620, + "args": { + "External id": 983049,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345939207384.899, "dur": 35.154, + "args": { + "External id": 983050,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345939207441.164, "dur": 20.710, + "args": { + "External id": 983051,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939207474.103, "dur": 59.469, + "args": { + "External id": 983052,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939207544.387, "dur": 48.580, + "args": { + "External id": 983053,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939207602.217, "dur": 24.117, + "args": { + "External id": 983054,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939207632.545, "dur": 33.866, + "args": { + "External id": 983055,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939207673.272, "dur": 29.564, + "args": { + "External id": 983056,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345939207713.932, "dur": 32.496, + "args": { + "External id": 983057,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345939207772.227, "dur": 24.926, + "args": { + "External id": 983058,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 5649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345939207832.707, "dur": 36.337, + "args": { + "External id": 983059,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345939207890.740, "dur": 20.288, + "args": { + "External id": 983060,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345939207932.828, "dur": 19.972, + "args": { + "External id": 983061,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345939207967.337, "dur": 17.664, + "args": { + "External id": 983062,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 5653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939208143.314, "dur": 18.179, + "args": { + "External id": 983063,"Record function id": 0, "Ev Idx": 5654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939208147.652, "dur": 12.567, + "args": { + "External id": 983064,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939208153.057, "dur": 6.248, + "args": { + "External id": 983065,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939208154.585, "dur": 4.622, + "args": { + "External id": 983066,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939208165.934, "dur": 11.703, + "args": { + "External id": 983067,"Record function id": 0, "Ev Idx": 5658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939208167.827, "dur": 9.276, + "args": { + "External id": 983068,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939208168.777, "dur": 7.629, + "args": { + "External id": 983069,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939208169.276, "dur": 7.027, + "args": { + "External id": 983070,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939208181.484, "dur": 5.269, + "args": { + "External id": 983071,"Record function id": 0, "Ev Idx": 5662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939208183.366, "dur": 2.875, + "args": { + "External id": 983072,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939208184.117, "dur": 1.631, + "args": { + "External id": 983073,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939208184.838, "dur": 0.818, + "args": { + "External id": 983074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939208190.514, "dur": 4.313, + "args": { + "External id": 983075,"Record function id": 0, "Ev Idx": 5666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939208192.100, "dur": 2.240, + "args": { + "External id": 983076,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939208192.732, "dur": 1.131, + "args": { + "External id": 983077,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939208193.066, "dur": 0.721, + "args": { + "External id": 983078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939208198.522, "dur": 4.951, + "args": { + "External id": 983079,"Record function id": 0, "Ev Idx": 5670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939208200.160, "dur": 2.828, + "args": { + "External id": 983080,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939208200.970, "dur": 1.515, + "args": { + "External id": 983081,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939208201.507, "dur": 0.904, + "args": { + "External id": 983082,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939208207.234, "dur": 6.509, + "args": { + "External id": 983083,"Record function id": 0, "Ev Idx": 5674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939208208.823, "dur": 4.423, + "args": { + "External id": 983084,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939208209.582, "dur": 3.128, + "args": { + "External id": 983085,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939208211.895, "dur": 0.705, + "args": { + "External id": 983086,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939208218.353, "dur": 4.453, + "args": { + "External id": 983087,"Record function id": 0, "Ev Idx": 5678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939208219.822, "dur": 2.468, + "args": { + "External id": 983088,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939208220.636, "dur": 1.164, + "args": { + "External id": 983089,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939208221.003, "dur": 0.712, + "args": { + "External id": 983090,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939208226.764, "dur": 5.031, + "args": { + "External id": 983091,"Record function id": 0, "Ev Idx": 5682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939208228.545, "dur": 2.734, + "args": { + "External id": 983092,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939208229.373, "dur": 1.446, + "args": { + "External id": 983093,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939208229.728, "dur": 1.014, + "args": { + "External id": 983094,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939208235.730, "dur": 7.333, + "args": { + "External id": 983095,"Record function id": 0, "Ev Idx": 5686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939208237.496, "dur": 5.086, + "args": { + "External id": 983096,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939208238.539, "dur": 3.578, + "args": { + "External id": 983097,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939208239.248, "dur": 2.796, + "args": { + "External id": 983098,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345939208248.213, "dur": 65056.255, + "args": { + "External id": 983099,"Record function id": 0, "Sequence number": 10552243, "Fwd thread id": 1, "Ev Idx": 5690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345939208249.943, "dur": 65044.658, + "args": { + "External id": 983100,"Sequence number": 10552243, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5691 + } + }, + { + "ph": "f", "id": 223, "pid": 2338710, "tid": 2379450, "ts": 6345939208249.943, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.0)", "pid": 2338710, "tid": 2379450, + "ts": 6345939208289.331, "dur": 45.966, + "args": { + "External id": 983101,"Record function id": 0, "Ev Idx": 5692 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.0)", "pid": 2338710, "tid": 2379450, + "ts": 6345939208344.869, "dur": 84.066, + "args": { + "External id": 983102,"Record function id": 0, "Ev Idx": 5693 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.0)", "pid": 2338710, "tid": 2379450, + "ts": 6345939208436.596, "dur": 64846.926, + "args": { + "External id": 983103,"Record function id": 0, "Ev Idx": 5694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939208549.700, "dur": 9.282, + "args": { + "External id": 983104,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939208571.339, "dur": 5.339, + "args": { + "External id": 983105,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345939208593.675, "dur": 63665.951, + "args": { + "External id": 983106,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345939208610.405, "dur": 63631.906, + "args": { + "External id": 983107,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939208715.801, "dur": 22.280, + "args": { + "External id": 983108,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345939208761.069, "dur": 63424.231, + "args": { + "External id": 983109,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 5700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345939208769.190, "dur": 63415.018, + "args": { + "External id": 983110,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 5701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939208774.717, "dur": 11.146, + "args": { + "External id": 983111,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345939208788.544, "dur": 63389.054, + "args": { + "External id": 983112,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 5703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939272387.625, "dur": 13.176, + "args": { + "External id": 983113,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 5704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939272391.705, "dur": 8.656, + "args": { + "External id": 983114,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345939272436.473, "dur": 424.753, + "args": { + "External id": 983115,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 5706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345939272473.519, "dur": 382.135, + "args": { + "External id": 983116,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5707, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345939272489.234, "dur": 360.151, + "args": { + "External id": 983117,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 5708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345939272889.806, "dur": 2.305, + "args": { + "External id": 983118,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5709, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939272961.391, "dur": 7.653, + "args": { + "External id": 983119,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939273050.375, "dur": 42.407, + "args": { + "External id": 983120,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939273118.790, "dur": 2.792, + "args": { + "External id": 983121,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939273135.490, "dur": 1.119, + "args": { + "External id": 983122,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939273152.583, "dur": 0.984, + "args": { + "External id": 983123,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939273166.734, "dur": 3.058, + "args": { + "External id": 983124,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939273181.213, "dur": 1.269, + "args": { + "External id": 983125,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939273195.747, "dur": 2.813, + "args": { + "External id": 983126,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939273210.554, "dur": 0.843, + "args": { + "External id": 983127,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345939273323.106, "dur": 324.601, + "args": { + "External id": 983128,"Record function id": 0, "Sequence number": 10552242, "Fwd thread id": 1, "Ev Idx": 5719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345939273326.369, "dur": 311.946, + "args": { + "External id": 983129,"Sequence number": 10552242, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5720 + } + }, + { + "ph": "f", "id": 224, "pid": 2338710, "tid": 2379450, "ts": 6345939273326.369, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_0", "pid": 2338710, "tid": 2379450, + "ts": 6345939273462.094, "dur": 60.247, + "args": { + "External id": 983130,"kernel_hash": "ci46ycwqu4mevlefaywvsre33v5ki6y5q5evkfmt4t2fie5todnp", "grid": "grid(131072000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "131072000"], "kernel_file": "/tmp/torchinductor_cvm/i4/ci46ycwqu4mevlefaywvsre33v5ki6y5q5evkfmt4t2fie5todnp.py", "kernel_backend": "triton", "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 5721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_1", "pid": 2338710, "tid": 2379450, + "ts": 6345939273542.284, "dur": 31.359, + "args": { + "External id": 983131,"kernel_hash": "c4hfnruzhc7gtb7rldzmrokxpon7tcgieufpcwt6pxhd3syqv6zy", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/4h/c4hfnruzhc7gtb7rldzmrokxpon7tcgieufpcwt6pxhd3syqv6zy.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096, 4096], [32000, 4096], []], "Ev Idx": 5722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_2", "pid": 2338710, "tid": 2379450, + "ts": 6345939273596.522, "dur": 24.220, + "args": { + "External id": 983132,"kernel_hash": "cagfbb4snc4nnt5qtlfe5j5npoahg3qjke5pbuljm6ouabijdwft", "grid": "grid(131072000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "131072000"], "kernel_file": "/tmp/torchinductor_cvm/ag/cagfbb4snc4nnt5qtlfe5j5npoahg3qjke5pbuljm6ouabijdwft.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 5723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939273658.788, "dur": 15.849, + "args": { + "External id": 983133,"Record function id": 0, "Ev Idx": 5724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345939273661.900, "dur": 11.666, + "args": { + "External id": 983134,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 5725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939273666.222, "dur": 6.325, + "args": { + "External id": 983135,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 5726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345939273668.031, "dur": 4.393, + "args": { + "External id": 983136,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 5727 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_post_backward_callback", "pid": 2338710, "tid": 2379450, + "ts": 6345939273698.965, "dur": 17101.876, + "args": { + "External id": 983137,"Record function id": 0, "Ev Idx": 5728 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate", "pid": 2338710, "tid": 2379450, + "ts": 6345939273719.861, "dur": 39.926, + "args": { + "External id": 983138,"Record function id": 0, "Ev Idx": 5729 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard", "pid": 2338710, "tid": 2379450, + "ts": 6345939273766.692, "dur": 221.670, + "args": { + "External id": 983139,"Record function id": 0, "Ev Idx": 5730 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce", "pid": 2338710, "tid": 2379450, + "ts": 6345939273995.043, "dur": 16512.211, + "args": { + "External id": 983140,"Record function id": 0, "Ev Idx": 5731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939274200.395, "dur": 10.981, + "args": { + "External id": 983141,"Record function id": 0, "Concrete Inputs": ["[1134596096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345939274225.904, "dur": 6.251, + "args": { + "External id": 983142,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1134596096], []], "Ev Idx": 5733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345939274257.586, "dur": 14654.875, + "args": { + "External id": 983143,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[], [], [], [141824512, 1]], "Input Dims": [[], [], [], [8, 141824512]], "Ev Idx": 5734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345939274280.359, "dur": 14613.979, + "args": { + "External id": 983144,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[], [], [], [141824512, 1]], "Input Dims": [[], [], [], [8, 141824512]], "Ev Idx": 5735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939275156.822, "dur": 29.421, + "args": { + "External id": 983145,"Record function id": 0, "Concrete Inputs": ["[277237]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345939275504.568, "dur": 13326.185, + "args": { + "External id": 983146,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[277237], [], [], [], [], [], [], []], "Ev Idx": 5737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345939275510.135, "dur": 13319.108, + "args": { + "External id": 983147,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[277237], [], [], [], [], [], []], "Ev Idx": 5738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939275517.781, "dur": 17.327, + "args": { + "External id": 983148,"Record function id": 0, "Concrete Inputs": ["[277237]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345939275538.119, "dur": 13282.190, + "args": { + "External id": 983149,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[277237], [277237], []], "Ev Idx": 5740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939289156.231, "dur": 16.231, + "args": { + "External id": 983150,"Record function id": 0, "Concrete Inputs": ["", "[141824512]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1134596096], [], [], [], [], []], "Ev Idx": 5741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345939289161.317, "dur": 10.482, + "args": { + "External id": 983151,"Record function id": 0, "Concrete Inputs": ["[141824512]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345939289214.128, "dur": 440.371, + "args": { + "External id": 983152,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[141824512], [1134596096], [], [], [], []], "Ev Idx": 5743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345939289254.443, "dur": 393.115, + "args": { + "External id": 983153,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 141824512, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1134596096], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5744, "In msg nelems": 1134596096 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345939289271.818, "dur": 368.383, + "args": { + "External id": 983154,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1134596096]], "Ev Idx": 5745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345939289682.656, "dur": 2.298, + "args": { + "External id": 983155,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5746, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939289749.218, "dur": 8.387, + "args": { + "External id": 983156,"Record function id": 0, "Concrete Inputs": ["", "[4000, 4096]", "[4096, 1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939289814.779, "dur": 1.342, + "args": { + "External id": 983157,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "16384000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939289834.351, "dur": 1.577, + "args": { + "External id": 983158,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "16384512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939289851.576, "dur": 1.067, + "args": { + "External id": 983159,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "18481664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939289867.076, "dur": 0.879, + "args": { + "External id": 983160,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "19005952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939289883.575, "dur": 1.199, + "args": { + "External id": 983161,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "19530240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939289897.382, "dur": 1.365, + "args": { + "External id": 983162,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "21627392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939289910.911, "dur": 0.673, + "args": { + "External id": 983163,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "21627904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939289923.153, "dur": 1.339, + "args": { + "External id": 983164,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "28967936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939289938.016, "dur": 1.144, + "args": { + "External id": 983165,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "36307968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939289952.496, "dur": 1.339, + "args": { + "External id": 983166,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "43648000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939289968.493, "dur": 1.287, + "args": { + "External id": 983167,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "43648512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939289982.302, "dur": 1.038, + "args": { + "External id": 983168,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "45745664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939289996.058, "dur": 1.059, + "args": { + "External id": 983169,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "46269952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939290028.628, "dur": 3.232, + "args": { + "External id": 983170,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "46794240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939290049.862, "dur": 0.890, + "args": { + "External id": 983171,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "48891392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939290109.654, "dur": 2.682, + "args": { + "External id": 983172,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "48891904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939290125.504, "dur": 0.992, + "args": { + "External id": 983173,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "56231936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939290139.085, "dur": 0.827, + "args": { + "External id": 983174,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "63571968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939290150.821, "dur": 1.638, + "args": { + "External id": 983175,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "70912000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939290164.317, "dur": 3.088, + "args": { + "External id": 983176,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "70912512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939290183.187, "dur": 1.116, + "args": { + "External id": 983177,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "73009664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939290196.105, "dur": 1.074, + "args": { + "External id": 983178,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "73533952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939290219.188, "dur": 1.217, + "args": { + "External id": 983179,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "74058240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939290233.619, "dur": 0.925, + "args": { + "External id": 983180,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "76155392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939290249.110, "dur": 1.150, + "args": { + "External id": 983181,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "76155904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939290263.381, "dur": 0.952, + "args": { + "External id": 983182,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "83495936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939290276.440, "dur": 1.079, + "args": { + "External id": 983183,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "90835968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939290287.872, "dur": 2.608, + "args": { + "External id": 983184,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "98176000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939290303.011, "dur": 1.184, + "args": { + "External id": 983185,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "98176512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939290315.496, "dur": 1.161, + "args": { + "External id": 983186,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "100273664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939290331.601, "dur": 0.881, + "args": { + "External id": 983187,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "100797952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939290344.822, "dur": 1.145, + "args": { + "External id": 983188,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "101322240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939290360.046, "dur": 1.402, + "args": { + "External id": 983189,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "103419392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939290371.691, "dur": 3.031, + "args": { + "External id": 983190,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "103419904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939290385.230, "dur": 1.193, + "args": { + "External id": 983191,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "110759936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939290398.499, "dur": 1.241, + "args": { + "External id": 983192,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "118099968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939290412.698, "dur": 1.018, + "args": { + "External id": 983193,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "125440000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345939290425.105, "dur": 0.962, + "args": { + "External id": 983194,"Record function id": 0, "Concrete Inputs": ["", "[4000, 4096]", "[4096, 1]", "125440512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: DivBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940434638.194, "dur": 122.443, + "args": { + "External id": 983195,"Record function id": 0, "Sequence number": 10552697, "Fwd thread id": 1, "Ev Idx": 5786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "DivBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940434647.719, "dur": 102.056, + "args": { + "External id": 983196,"Sequence number": 10552697, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 5787 + } + }, + { + "ph": "f", "id": 225, "pid": 2338710, "tid": 2379450, "ts": 6345940434647.719, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338710, "tid": 2379450, + "ts": 6345940434657.371, "dur": 90.818, + "args": { + "External id": 983197,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 5788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940434771.078, "dur": 278.400, + "args": { + "External id": 983198,"Record function id": 0, "Ev Idx": 5789 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345940434849.956, "dur": 97.133, + "args": { + "External id": 983199,"Record function id": 0, "Ev Idx": 5790 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.25", "pid": 2338710, "tid": 2379450, + "ts": 6345940434884.157, "dur": 48.655, + "args": { + "External id": 983200,"Record function id": 0, "Ev Idx": 5791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940434953.101, "dur": 2.582, + "args": { + "External id": 983201,"Sequence number": 10552696, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 5792 + } + }, + { + "ph": "f", "id": 226, "pid": 2338710, "tid": 2379450, "ts": 6345940434953.101, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940434960.225, "dur": 82.514, + "args": { + "External id": 983202,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 5793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940434966.156, "dur": 75.528, + "args": { + "External id": 983203,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 5794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940434977.835, "dur": 3.850, + "args": { + "External id": 983204,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 5795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940435105.853, "dur": 38630.208, + "args": { + "External id": 983205,"Record function id": 0, "Sequence number": 10552694, "Fwd thread id": 1, "Ev Idx": 5796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940435109.821, "dur": 38608.852, + "args": { + "External id": 983206,"Sequence number": 10552694, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5797 + } + }, + { + "ph": "f", "id": 227, "pid": 2338710, "tid": 2379450, "ts": 6345940435109.821, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940435156.708, "dur": 6.462, + "args": { + "External id": 983207,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345940435167.206, "dur": 38184.615, + "args": { + "External id": 983208,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345940435171.322, "dur": 38180.029, + "args": { + "External id": 983209,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 5800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940435176.195, "dur": 8.980, + "args": { + "External id": 983210,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940435187.457, "dur": 38162.169, + "args": { + "External id": 983211,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 5802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338710, "tid": 2379450, + "ts": 6345940473358.968, "dur": 0.771, + "args": { + "External id": 983212,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338710, "tid": 2379450, + "ts": 6345940473363.865, "dur": 4.551, + "args": { + "External id": 983213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338710, "tid": 2379450, + "ts": 6345940473365.558, "dur": 2.449, + "args": { + "External id": 983214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338710, "tid": 2379450, + "ts": 6345940473377.432, "dur": 46.175, + "args": { + "External id": 983215,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 5806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338710, "tid": 2379450, + "ts": 6345940473435.143, "dur": 57.725, + "args": { + "External id": 983216,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338710, "tid": 2379450, + "ts": 6345940473437.647, "dur": 54.972, + "args": { + "External id": 983217,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338710, "tid": 2379450, + "ts": 6345940473440.088, "dur": 52.191, + "args": { + "External id": 983218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940473757.110, "dur": 26.701, + "args": { + "External id": 983219,"Record function id": 0, "Sequence number": 10552693, "Fwd thread id": 1, "Ev Idx": 5810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940473760.985, "dur": 19.776, + "args": { + "External id": 983220,"Sequence number": 10552693, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 5811 + } + }, + { + "ph": "f", "id": 228, "pid": 2338710, "tid": 2379450, "ts": 6345940473760.985, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940473768.413, "dur": 12.048, + "args": { + "External id": 983221,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940473771.940, "dur": 7.707, + "args": { + "External id": 983222,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940473789.698, "dur": 141.186, + "args": { + "External id": 983223,"Record function id": 0, "Sequence number": 10552692, "Fwd thread id": 1, "Ev Idx": 5814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940473791.017, "dur": 131.070, + "args": { + "External id": 983224,"Sequence number": 10552692, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5815 + } + }, + { + "ph": "f", "id": 229, "pid": 2338710, "tid": 2379450, "ts": 6345940473791.017, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345940473796.627, "dur": 124.714, + "args": { + "External id": 983225,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 5816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345940473804.943, "dur": 49.036, + "args": { + "External id": 983226,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940473810.886, "dur": 7.620, + "args": { + "External id": 983227,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345940473821.651, "dur": 30.866, + "args": { + "External id": 983228,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345940473826.263, "dur": 25.681, + "args": { + "External id": 983229,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 5820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345940473856.941, "dur": 8.868, + "args": { + "External id": 983230,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 5821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940473863.175, "dur": 2.125, + "args": { + "External id": 983231,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940473868.500, "dur": 50.620, + "args": { + "External id": 983232,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940473937.955, "dur": 110.243, + "args": { + "External id": 983233,"Record function id": 0, "Sequence number": 10552691, "Fwd thread id": 1, "Ev Idx": 5824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940473939.554, "dur": 102.597, + "args": { + "External id": 983234,"Sequence number": 10552691, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5825 + } + }, + { + "ph": "f", "id": 230, "pid": 2338710, "tid": 2379450, "ts": 6345940473939.554, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345940473945.180, "dur": 96.533, + "args": { + "External id": 983235,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "3"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345940473950.523, "dur": 31.043, + "args": { + "External id": 983236,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940473952.018, "dur": 4.486, + "args": { + "External id": 983237,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345940473957.681, "dur": 23.505, + "args": { + "External id": 983238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345940473963.547, "dur": 17.186, + "args": { + "External id": 983239,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2379450, + "ts": 6345940473984.263, "dur": 9.051, + "args": { + "External id": 983240,"Record function id": 0, "Concrete Inputs": ["", "2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 5831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940473989.760, "dur": 1.524, + "args": { + "External id": 983241,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940473994.414, "dur": 46.113, + "args": { + "External id": 983242,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940474095.034, "dur": 258.454, + "args": { + "External id": 983243,"Record function id": 0, "Sequence number": 10552690, "Fwd thread id": 1, "Ev Idx": 5834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940474097.502, "dur": 250.159, + "args": { + "External id": 983244,"Sequence number": 10552690, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5835 + } + }, + { + "ph": "f", "id": 231, "pid": 2338710, "tid": 2379450, "ts": 6345940474097.502, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345940474102.228, "dur": 244.688, + "args": { + "External id": 983245,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345940474105.540, "dur": 31.748, + "args": { + "External id": 983246,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940474107.852, "dur": 3.961, + "args": { + "External id": 983247,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345940474113.491, "dur": 23.483, + "args": { + "External id": 983248,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345940474118.048, "dur": 18.400, + "args": { + "External id": 983249,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345940474138.649, "dur": 6.180, + "args": { + "External id": 983250,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940474143.256, "dur": 1.150, + "args": { + "External id": 983251,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940474146.146, "dur": 199.197, + "args": { + "External id": 983252,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940474361.641, "dur": 117.172, + "args": { + "External id": 983253,"Record function id": 0, "Sequence number": 10552689, "Fwd thread id": 1, "Ev Idx": 5844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940474362.928, "dur": 109.285, + "args": { + "External id": 983254,"Sequence number": 10552689, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5845 + } + }, + { + "ph": "f", "id": 232, "pid": 2338710, "tid": 2379450, "ts": 6345940474362.928, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345940474365.237, "dur": 106.399, + "args": { + "External id": 983255,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345940474367.130, "dur": 24.118, + "args": { + "External id": 983256,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940474368.937, "dur": 2.596, + "args": { + "External id": 983257,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345940474372.385, "dur": 18.530, + "args": { + "External id": 983258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345940474376.755, "dur": 13.664, + "args": { + "External id": 983259,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345940474392.544, "dur": 7.603, + "args": { + "External id": 983260,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940474395.627, "dur": 4.285, + "args": { + "External id": 983261,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940474407.973, "dur": 62.392, + "args": { + "External id": 983262,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940474486.242, "dur": 46.115, + "args": { + "External id": 983263,"Record function id": 0, "Sequence number": 10552688, "Fwd thread id": 1, "Ev Idx": 5854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940474487.932, "dur": 1.380, + "args": { + "External id": 983264,"Sequence number": 10552688, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 5855 + } + }, + { + "ph": "f", "id": 233, "pid": 2338710, "tid": 2379450, "ts": 6345940474487.932, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940474492.609, "dur": 35.663, + "args": { + "External id": 983265,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 5856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940474496.096, "dur": 31.608, + "args": { + "External id": 983266,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 5857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940474504.085, "dur": 0.870, + "args": { + "External id": 983267,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 5858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940474539.122, "dur": 2286.719, + "args": { + "External id": 983268,"Record function id": 0, "Sequence number": 10552686, "Fwd thread id": 1, "Ev Idx": 5859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940474541.324, "dur": 2241.143, + "args": { + "External id": 983269,"Sequence number": 10552686, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5860 + } + }, + { + "ph": "f", "id": 234, "pid": 2338710, "tid": 2379450, "ts": 6345940474541.324, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940474588.155, "dur": 5.413, + "args": { + "External id": 983270,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345940474597.240, "dur": 1923.813, + "args": { + "External id": 983271,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345940474600.681, "dur": 1920.002, + "args": { + "External id": 983272,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 5863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940474605.088, "dur": 7.033, + "args": { + "External id": 983273,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940474613.270, "dur": 1906.043, + "args": { + "External id": 983274,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 5865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338710, "tid": 2379450, + "ts": 6345940476525.730, "dur": 0.435, + "args": { + "External id": 983275,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338710, "tid": 2379450, + "ts": 6345940476527.962, "dur": 6.120, + "args": { + "External id": 983276,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338710, "tid": 2379450, + "ts": 6345940476532.408, "dur": 1.501, + "args": { + "External id": 983277,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338710, "tid": 2379450, + "ts": 6345940476539.902, "dur": 32.875, + "args": { + "External id": 983278,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 5869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338710, "tid": 2379450, + "ts": 6345940476580.053, "dur": 44.744, + "args": { + "External id": 983279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338710, "tid": 2379450, + "ts": 6345940476581.647, "dur": 42.871, + "args": { + "External id": 983280,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338710, "tid": 2379450, + "ts": 6345940476583.270, "dur": 40.824, + "args": { + "External id": 983281,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940476796.836, "dur": 24.881, + "args": { + "External id": 983282,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 5873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940476838.701, "dur": 16.939, + "args": { + "External id": 983283,"Record function id": 0, "Sequence number": 10552685, "Fwd thread id": 1, "Ev Idx": 5874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940476840.436, "dur": 12.373, + "args": { + "External id": 983284,"Sequence number": 10552685, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 5875 + } + }, + { + "ph": "f", "id": 235, "pid": 2338710, "tid": 2379450, "ts": 6345940476840.436, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940476844.734, "dur": 7.797, + "args": { + "External id": 983285,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940476846.939, "dur": 5.329, + "args": { + "External id": 983286,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940476859.882, "dur": 93.550, + "args": { + "External id": 983287,"Record function id": 0, "Sequence number": 10552684, "Fwd thread id": 1, "Ev Idx": 5878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940476861.046, "dur": 86.894, + "args": { + "External id": 983288,"Sequence number": 10552684, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5879 + } + }, + { + "ph": "f", "id": 236, "pid": 2338710, "tid": 2379450, "ts": 6345940476861.046, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345940476863.969, "dur": 83.434, + "args": { + "External id": 983289,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 5880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345940476868.103, "dur": 30.046, + "args": { + "External id": 983290,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940476874.028, "dur": 3.501, + "args": { + "External id": 983291,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345940476878.581, "dur": 19.208, + "args": { + "External id": 983292,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345940476880.823, "dur": 16.439, + "args": { + "External id": 983293,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 5884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345940476900.196, "dur": 6.241, + "args": { + "External id": 983294,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 5885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940476904.464, "dur": 1.486, + "args": { + "External id": 983295,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940476908.045, "dur": 38.228, + "args": { + "External id": 983296,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940476958.426, "dur": 127.827, + "args": { + "External id": 983297,"Record function id": 0, "Sequence number": 10552683, "Fwd thread id": 1, "Ev Idx": 5888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940476959.550, "dur": 88.229, + "args": { + "External id": 983298,"Sequence number": 10552683, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5889 + } + }, + { + "ph": "f", "id": 237, "pid": 2338710, "tid": 2379450, "ts": 6345940476959.550, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345940476962.597, "dur": 84.762, + "args": { + "External id": 983299,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "2"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345940476965.290, "dur": 21.486, + "args": { + "External id": 983300,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940476966.666, "dur": 2.770, + "args": { + "External id": 983301,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345940476970.353, "dur": 16.090, + "args": { + "External id": 983302,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345940476971.518, "dur": 14.456, + "args": { + "External id": 983303,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2379450, + "ts": 6345940476997.854, "dur": 7.941, + "args": { + "External id": 983304,"Record function id": 0, "Concrete Inputs": ["", "2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 5895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940477003.898, "dur": 1.225, + "args": { + "External id": 983305,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940477006.767, "dur": 39.536, + "args": { + "External id": 983306,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940477096.114, "dur": 163.285, + "args": { + "External id": 983307,"Record function id": 0, "Sequence number": 10552682, "Fwd thread id": 1, "Ev Idx": 5898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940477097.880, "dur": 155.517, + "args": { + "External id": 983308,"Sequence number": 10552682, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5899 + } + }, + { + "ph": "f", "id": 238, "pid": 2338710, "tid": 2379450, "ts": 6345940477097.880, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345940477100.843, "dur": 152.101, + "args": { + "External id": 983309,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345940477103.049, "dur": 28.165, + "args": { + "External id": 983310,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940477104.950, "dur": 3.561, + "args": { + "External id": 983311,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345940477109.503, "dur": 21.374, + "args": { + "External id": 983312,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345940477110.373, "dur": 20.004, + "args": { + "External id": 983313,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345940477132.549, "dur": 7.188, + "args": { + "External id": 983314,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940477135.877, "dur": 3.528, + "args": { + "External id": 983315,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940477142.578, "dur": 109.132, + "args": { + "External id": 983316,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940477267.397, "dur": 136.689, + "args": { + "External id": 983317,"Record function id": 0, "Sequence number": 10552681, "Fwd thread id": 1, "Ev Idx": 5908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940477268.463, "dur": 110.670, + "args": { + "External id": 983318,"Sequence number": 10552681, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5909 + } + }, + { + "ph": "f", "id": 239, "pid": 2338710, "tid": 2379450, "ts": 6345940477268.463, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345940477270.977, "dur": 107.779, + "args": { + "External id": 983319,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345940477273.184, "dur": 23.426, + "args": { + "External id": 983320,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940477275.466, "dur": 2.442, + "args": { + "External id": 983321,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345940477278.682, "dur": 17.621, + "args": { + "External id": 983322,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345940477280.044, "dur": 15.827, + "args": { + "External id": 983323,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345940477297.837, "dur": 3.386, + "args": { + "External id": 983324,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940477300.277, "dur": 0.666, + "args": { + "External id": 983325,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940477304.455, "dur": 73.303, + "args": { + "External id": 983326,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940477383.958, "dur": 18.425, + "args": { + "External id": 983327,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940477409.764, "dur": 40.482, + "args": { + "External id": 983328,"Record function id": 0, "Sequence number": 10552680, "Fwd thread id": 1, "Ev Idx": 5919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940477411.341, "dur": 1.480, + "args": { + "External id": 983329,"Sequence number": 10552680, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 5920 + } + }, + { + "ph": "f", "id": 240, "pid": 2338710, "tid": 2379450, "ts": 6345940477411.341, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940477415.766, "dur": 31.204, + "args": { + "External id": 983330,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 5921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940477418.796, "dur": 27.577, + "args": { + "External id": 983331,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 5922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940477425.452, "dur": 0.738, + "args": { + "External id": 983332,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 5923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940477456.701, "dur": 3371.417, + "args": { + "External id": 983333,"Record function id": 0, "Sequence number": 10552678, "Fwd thread id": 1, "Ev Idx": 5924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940477458.618, "dur": 3331.903, + "args": { + "External id": 983334,"Sequence number": 10552678, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5925 + } + }, + { + "ph": "f", "id": 241, "pid": 2338710, "tid": 2379450, "ts": 6345940477458.618, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940477499.018, "dur": 3.325, + "args": { + "External id": 983335,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345940477505.369, "dur": 3029.176, + "args": { + "External id": 983336,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345940477507.283, "dur": 3026.885, + "args": { + "External id": 983337,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 5928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940477511.518, "dur": 4.293, + "args": { + "External id": 983338,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940477517.178, "dur": 3015.970, + "args": { + "External id": 983339,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 5930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338710, "tid": 2379450, + "ts": 6345940480539.164, "dur": 0.476, + "args": { + "External id": 983340,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338710, "tid": 2379450, + "ts": 6345940480541.448, "dur": 5.772, + "args": { + "External id": 983341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338710, "tid": 2379450, + "ts": 6345940480545.907, "dur": 1.129, + "args": { + "External id": 983342,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338710, "tid": 2379450, + "ts": 6345940480553.239, "dur": 29.073, + "args": { + "External id": 983343,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 5934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338710, "tid": 2379450, + "ts": 6345940480589.057, "dur": 45.905, + "args": { + "External id": 983344,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338710, "tid": 2379450, + "ts": 6345940480590.607, "dur": 44.067, + "args": { + "External id": 983345,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338710, "tid": 2379450, + "ts": 6345940480592.131, "dur": 42.177, + "args": { + "External id": 983346,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940480803.884, "dur": 19.504, + "args": { + "External id": 983347,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 5938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940480842.468, "dur": 15.663, + "args": { + "External id": 983348,"Record function id": 0, "Sequence number": 10552677, "Fwd thread id": 1, "Ev Idx": 5939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940480844.102, "dur": 11.540, + "args": { + "External id": 983349,"Sequence number": 10552677, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 5940 + } + }, + { + "ph": "f", "id": 242, "pid": 2338710, "tid": 2379450, "ts": 6345940480844.102, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940480848.180, "dur": 7.174, + "args": { + "External id": 983350,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940480850.394, "dur": 4.798, + "args": { + "External id": 983351,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940480862.493, "dur": 87.434, + "args": { + "External id": 983352,"Record function id": 0, "Sequence number": 10552676, "Fwd thread id": 1, "Ev Idx": 5943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940480863.897, "dur": 80.750, + "args": { + "External id": 983353,"Sequence number": 10552676, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5944 + } + }, + { + "ph": "f", "id": 243, "pid": 2338710, "tid": 2379450, "ts": 6345940480863.897, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345940480866.914, "dur": 77.215, + "args": { + "External id": 983354,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 5945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345940480871.108, "dur": 30.801, + "args": { + "External id": 983355,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940480876.838, "dur": 3.614, + "args": { + "External id": 983356,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345940480882.056, "dur": 19.381, + "args": { + "External id": 983357,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345940480886.866, "dur": 14.016, + "args": { + "External id": 983358,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 5949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345940480904.170, "dur": 8.449, + "args": { + "External id": 983359,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 5950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940480908.107, "dur": 4.024, + "args": { + "External id": 983360,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940480914.176, "dur": 28.702, + "args": { + "External id": 983361,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940480954.907, "dur": 83.099, + "args": { + "External id": 983362,"Record function id": 0, "Sequence number": 10552675, "Fwd thread id": 1, "Ev Idx": 5953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940480956.203, "dur": 76.671, + "args": { + "External id": 983363,"Sequence number": 10552675, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5954 + } + }, + { + "ph": "f", "id": 244, "pid": 2338710, "tid": 2379450, "ts": 6345940480956.203, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345940480958.685, "dur": 73.738, + "args": { + "External id": 983364,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345940480961.625, "dur": 23.651, + "args": { + "External id": 983365,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940480963.042, "dur": 2.909, + "args": { + "External id": 983366,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345940480970.555, "dur": 14.393, + "args": { + "External id": 983367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345940480971.789, "dur": 12.629, + "args": { + "External id": 983368,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2379450, + "ts": 6345940480986.817, "dur": 7.033, + "args": { + "External id": 983369,"Record function id": 0, "Concrete Inputs": ["", "2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 5960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940480991.968, "dur": 1.183, + "args": { + "External id": 983370,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940480994.915, "dur": 36.465, + "args": { + "External id": 983371,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940481045.438, "dur": 184.676, + "args": { + "External id": 983372,"Record function id": 0, "Sequence number": 10552674, "Fwd thread id": 1, "Ev Idx": 5963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940481047.239, "dur": 176.267, + "args": { + "External id": 983373,"Sequence number": 10552674, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5964 + } + }, + { + "ph": "f", "id": 245, "pid": 2338710, "tid": 2379450, "ts": 6345940481047.239, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345940481049.801, "dur": 173.259, + "args": { + "External id": 983374,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345940481086.816, "dur": 30.465, + "args": { + "External id": 983375,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940481089.147, "dur": 4.293, + "args": { + "External id": 983376,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345940481094.664, "dur": 22.278, + "args": { + "External id": 983377,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345940481098.516, "dur": 18.022, + "args": { + "External id": 983378,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345940481118.784, "dur": 4.183, + "args": { + "External id": 983379,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940481121.882, "dur": 0.714, + "args": { + "External id": 983380,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940481123.947, "dur": 98.142, + "args": { + "External id": 983381,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940481239.110, "dur": 131.852, + "args": { + "External id": 983382,"Record function id": 0, "Sequence number": 10552673, "Fwd thread id": 1, "Ev Idx": 5973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940481240.535, "dur": 100.316, + "args": { + "External id": 983383,"Sequence number": 10552673, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5974 + } + }, + { + "ph": "f", "id": 246, "pid": 2338710, "tid": 2379450, "ts": 6345940481240.535, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345940481242.951, "dur": 97.427, + "args": { + "External id": 983384,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345940481244.966, "dur": 21.213, + "args": { + "External id": 983385,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940481246.797, "dur": 2.418, + "args": { + "External id": 983386,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345940481250.326, "dur": 15.538, + "args": { + "External id": 983387,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345940481251.355, "dur": 14.065, + "args": { + "External id": 983388,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345940481269.982, "dur": 4.528, + "args": { + "External id": 983389,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940481273.140, "dur": 1.048, + "args": { + "External id": 983390,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940481275.219, "dur": 64.192, + "args": { + "External id": 983391,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940481347.293, "dur": 21.621, + "args": { + "External id": 983392,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940481376.848, "dur": 52.338, + "args": { + "External id": 983393,"Record function id": 0, "Sequence number": 10552672, "Fwd thread id": 1, "Ev Idx": 5984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940481379.010, "dur": 1.376, + "args": { + "External id": 983394,"Sequence number": 10552672, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 5985 + } + }, + { + "ph": "f", "id": 247, "pid": 2338710, "tid": 2379450, "ts": 6345940481379.010, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940481383.042, "dur": 39.779, + "args": { + "External id": 983395,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 5986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940481385.634, "dur": 36.646, + "args": { + "External id": 983396,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 5987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940481392.451, "dur": 5.959, + "args": { + "External id": 983397,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 5988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940481436.125, "dur": 3382.231, + "args": { + "External id": 983398,"Record function id": 0, "Sequence number": 10552671, "Fwd thread id": 1, "Ev Idx": 5989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940481450.033, "dur": 3331.427, + "args": { + "External id": 983399,"Sequence number": 10552671, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5990 + } + }, + { + "ph": "f", "id": 248, "pid": 2338710, "tid": 2379450, "ts": 6345940481450.033, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940481485.133, "dur": 3.877, + "args": { + "External id": 983400,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345940481491.816, "dur": 3048.476, + "args": { + "External id": 983401,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345940481493.980, "dur": 3046.048, + "args": { + "External id": 983402,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 5993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940481497.957, "dur": 3.939, + "args": { + "External id": 983403,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940481503.100, "dur": 3035.626, + "args": { + "External id": 983404,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 5995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338710, "tid": 2379450, + "ts": 6345940484544.658, "dur": 0.388, + "args": { + "External id": 983405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338710, "tid": 2379450, + "ts": 6345940484546.873, "dur": 5.864, + "args": { + "External id": 983406,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338710, "tid": 2379450, + "ts": 6345940484551.460, "dur": 1.135, + "args": { + "External id": 983407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338710, "tid": 2379450, + "ts": 6345940484557.743, "dur": 25.595, + "args": { + "External id": 983408,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 5999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338710, "tid": 2379450, + "ts": 6345940484589.402, "dur": 45.612, + "args": { + "External id": 983409,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 6000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338710, "tid": 2379450, + "ts": 6345940484590.877, "dur": 43.913, + "args": { + "External id": 983410,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 6001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338710, "tid": 2379450, + "ts": 6345940484592.642, "dur": 41.662, + "args": { + "External id": 983411,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 6002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940484795.005, "dur": 18.409, + "args": { + "External id": 983412,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 6003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940484835.302, "dur": 15.998, + "args": { + "External id": 983413,"Record function id": 0, "Ev Idx": 6004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940484838.657, "dur": 10.671, + "args": { + "External id": 983414,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 6005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940484842.438, "dur": 5.712, + "args": { + "External id": 983415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 6006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940484843.776, "dur": 4.245, + "args": { + "External id": 983416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 6007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940484855.461, "dur": 18.130, + "args": { + "External id": 983417,"Record function id": 0, "Sequence number": 10552670, "Fwd thread id": 1, "Ev Idx": 6008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940484856.895, "dur": 14.068, + "args": { + "External id": 983418,"Sequence number": 10552670, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6009 + } + }, + { + "ph": "f", "id": 249, "pid": 2338710, "tid": 2379450, "ts": 6345940484856.895, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940484861.600, "dur": 9.074, + "args": { + "External id": 983419,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940484866.568, "dur": 3.882, + "args": { + "External id": 983420,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940484877.698, "dur": 81.561, + "args": { + "External id": 983421,"Record function id": 0, "Sequence number": 10552669, "Fwd thread id": 1, "Ev Idx": 6012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940484878.969, "dur": 75.548, + "args": { + "External id": 983422,"Sequence number": 10552669, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6013 + } + }, + { + "ph": "f", "id": 250, "pid": 2338710, "tid": 2379450, "ts": 6345940484878.969, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345940484882.182, "dur": 71.894, + "args": { + "External id": 983423,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 6014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345940484885.912, "dur": 30.425, + "args": { + "External id": 983424,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 6015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940484888.391, "dur": 3.637, + "args": { + "External id": 983425,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345940484893.097, "dur": 22.887, + "args": { + "External id": 983426,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345940484895.388, "dur": 19.866, + "args": { + "External id": 983427,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345940484917.906, "dur": 5.011, + "args": { + "External id": 983428,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 6019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940484921.085, "dur": 1.401, + "args": { + "External id": 983429,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 6020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940484924.800, "dur": 28.214, + "args": { + "External id": 983430,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940484964.390, "dur": 83.994, + "args": { + "External id": 983431,"Record function id": 0, "Sequence number": 10552668, "Fwd thread id": 1, "Ev Idx": 6022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940484968.301, "dur": 75.717, + "args": { + "External id": 983432,"Sequence number": 10552668, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6023 + } + }, + { + "ph": "f", "id": 251, "pid": 2338710, "tid": 2379450, "ts": 6345940484968.301, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345940484971.440, "dur": 72.254, + "args": { + "External id": 983433,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 6024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345940484973.764, "dur": 24.891, + "args": { + "External id": 983434,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 6025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940484975.557, "dur": 5.207, + "args": { + "External id": 983435,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345940484981.645, "dur": 16.676, + "args": { + "External id": 983436,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345940484982.807, "dur": 15.055, + "args": { + "External id": 983437,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 6028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2379450, + "ts": 6345940485000.123, "dur": 6.443, + "args": { + "External id": 983438,"Record function id": 0, "Concrete Inputs": ["", "2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 6029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940485004.956, "dur": 1.052, + "args": { + "External id": 983439,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940485021.899, "dur": 20.849, + "args": { + "External id": 983440,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940485094.083, "dur": 154.954, + "args": { + "External id": 983441,"Record function id": 0, "Sequence number": 10552667, "Fwd thread id": 1, "Ev Idx": 6032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940485097.197, "dur": 146.520, + "args": { + "External id": 983442,"Sequence number": 10552667, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6033 + } + }, + { + "ph": "f", "id": 252, "pid": 2338710, "tid": 2379450, "ts": 6345940485097.197, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345940485099.780, "dur": 143.240, + "args": { + "External id": 983443,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 6034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345940485104.744, "dur": 33.188, + "args": { + "External id": 983444,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 6035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940485106.704, "dur": 3.920, + "args": { + "External id": 983445,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345940485117.321, "dur": 20.306, + "args": { + "External id": 983446,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345940485120.964, "dur": 16.124, + "args": { + "External id": 983447,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 6038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345940485139.460, "dur": 4.090, + "args": { + "External id": 983448,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 6039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940485142.269, "dur": 0.932, + "args": { + "External id": 983449,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940485144.463, "dur": 97.234, + "args": { + "External id": 983450,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 6041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940485258.737, "dur": 139.542, + "args": { + "External id": 983451,"Record function id": 0, "Sequence number": 10552666, "Fwd thread id": 1, "Ev Idx": 6042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940485259.948, "dur": 111.499, + "args": { + "External id": 983452,"Sequence number": 10552666, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6043 + } + }, + { + "ph": "f", "id": 253, "pid": 2338710, "tid": 2379450, "ts": 6345940485259.948, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345940485262.532, "dur": 108.463, + "args": { + "External id": 983453,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 6044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2379450, + "ts": 6345940485264.719, "dur": 25.206, + "args": { + "External id": 983454,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 6045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940485266.223, "dur": 2.741, + "args": { + "External id": 983455,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2379450, + "ts": 6345940485272.652, "dur": 16.950, + "args": { + "External id": 983456,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2379450, + "ts": 6345940485273.879, "dur": 15.239, + "args": { + "External id": 983457,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 6048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345940485291.153, "dur": 3.488, + "args": { + "External id": 983458,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 6049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940485293.921, "dur": 0.485, + "args": { + "External id": 983459,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940485295.739, "dur": 74.180, + "args": { + "External id": 983460,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 6051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940485378.453, "dur": 17.209, + "args": { + "External id": 983461,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 6052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940485406.376, "dur": 395.409, + "args": { + "External id": 983462,"Record function id": 0, "Sequence number": 10552665, "Fwd thread id": 1, "Ev Idx": 6053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940485408.171, "dur": 384.329, + "args": { + "External id": 983463,"Sequence number": 10552665, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6054 + } + }, + { + "ph": "f", "id": 254, "pid": 2338710, "tid": 2379450, "ts": 6345940485408.171, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345940485598.680, "dur": 50.446, + "args": { + "External id": 983464,"kernel_hash": "c2hiad6vohc5juoazzm5elv5p6zjifqe63zwhygqq3luayunx4no", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "131072", "4096", "1", "993", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2h/c2hiad6vohc5juoazzm5elv5p6zjifqe63zwhygqq3luayunx4no.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[131072, 4096], [4096], [131072, 4096], [131072, 4096], [132, 4096], [131072], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_0", "pid": 2338710, "tid": 2379450, + "ts": 6345940485688.889, "dur": 28.537, + "args": { + "External id": 983465,"kernel_hash": "c6346guyxknvaslrs2ei3ec2tw4wgztd2bkmy2hswhmzwewgk7bb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/63/c6346guyxknvaslrs2ei3ec2tw4wgztd2bkmy2hswhmzwewgk7bb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 6056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_1", "pid": 2338710, "tid": 2379450, + "ts": 6345940485740.103, "dur": 27.195, + "args": { + "External id": 983466,"kernel_hash": "cey5irmf4ovxj63ncyq4qfgp27xz4mzqhlhvmoqbijvnsrskn4ba", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/ey/cey5irmf4ovxj63ncyq4qfgp27xz4mzqhlhvmoqbijvnsrskn4ba.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 6057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940485811.260, "dur": 12.377, + "args": { + "External id": 983467,"Record function id": 0, "Ev Idx": 6058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940485813.408, "dur": 9.138, + "args": { + "External id": 983468,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940485816.786, "dur": 4.721, + "args": { + "External id": 983469,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940485818.182, "dur": 3.194, + "args": { + "External id": 983470,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: StackBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940485828.130, "dur": 42.349, + "args": { + "External id": 983471,"Record function id": 0, "Sequence number": 10552664, "Fwd thread id": 1, "Ev Idx": 6062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "StackBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940485829.265, "dur": 32.845, + "args": { + "External id": 983472,"Sequence number": 10552664, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6063 + } + }, + { + "ph": "f", "id": 255, "pid": 2338710, "tid": 2379450, "ts": 6345940485829.265, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2379450, + "ts": 6345940485831.810, "dur": 10.381, + "args": { + "External id": 983473,"Record function id": 0, "Concrete Inputs": ["", "-2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 6064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940485838.489, "dur": 1.068, + "args": { + "External id": 983474,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2379450, + "ts": 6345940485843.104, "dur": 4.610, + "args": { + "External id": 983475,"Record function id": 0, "Concrete Inputs": ["", "-2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 6066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940485845.694, "dur": 0.631, + "args": { + "External id": 983476,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2379450, + "ts": 6345940485848.710, "dur": 4.685, + "args": { + "External id": 983477,"Record function id": 0, "Concrete Inputs": ["", "-2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 6068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940485851.857, "dur": 0.605, + "args": { + "External id": 983478,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2379450, + "ts": 6345940485854.041, "dur": 7.223, + "args": { + "External id": 983479,"Record function id": 0, "Concrete Inputs": ["", "-2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 6070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940485857.026, "dur": 3.405, + "args": { + "External id": 983480,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940485875.151, "dur": 6.403, + "args": { + "External id": 983481,"Record function id": 0, "Sequence number": 10552663, "Fwd thread id": 1, "Ev Idx": 6072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940485876.474, "dur": 1.210, + "args": { + "External id": 983482,"Sequence number": 10552663, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6073 + } + }, + { + "ph": "f", "id": 256, "pid": 2338710, "tid": 2379450, "ts": 6345940485876.474, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940485886.657, "dur": 707.675, + "args": { + "External id": 983483,"Record function id": 0, "Sequence number": 10552662, "Fwd thread id": 1, "Ev Idx": 6074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940485887.881, "dur": 689.637, + "args": { + "External id": 983484,"Sequence number": 10552662, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6075 + } + }, + { + "ph": "f", "id": 257, "pid": 2338710, "tid": 2379450, "ts": 6345940485887.881, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940485934.050, "dur": 12.861, + "args": { + "External id": 983485,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338710, "tid": 2379450, + "ts": 6345940485941.220, "dur": 5.380, + "args": { + "External id": 983486,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 6077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940485951.695, "dur": 9.577, + "args": { + "External id": 983487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940485954.669, "dur": 5.605, + "args": { + "External id": 983488,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940485959.172, "dur": 0.850, + "args": { + "External id": 983489,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2379450, + "ts": 6345940485966.144, "dur": 230.908, + "args": { + "External id": 983490,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 6081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940485967.855, "dur": 4.442, + "args": { + "External id": 983491,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 6082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940485968.806, "dur": 2.541, + "args": { + "External id": 983492,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940485970.796, "dur": 0.392, + "args": { + "External id": 983493,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2379450, + "ts": 6345940485978.644, "dur": 217.439, + "args": { + "External id": 983494,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940485983.651, "dur": 210.331, + "args": { + "External id": 983495,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2379450, + "ts": 6345940486206.830, "dur": 7.629, + "args": { + "External id": 983496,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 6087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940486210.197, "dur": 4.121, + "args": { + "External id": 983497,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940486256.690, "dur": 9.906, + "args": { + "External id": 983498,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940486268.705, "dur": 4.099, + "args": { + "External id": 983499,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940486274.165, "dur": 4.810, + "args": { + "External id": 983500,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940486328.689, "dur": 3.524, + "args": { + "External id": 983501,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940486330.245, "dur": 1.752, + "args": { + "External id": 983502,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338710, "tid": 2379450, + "ts": 6345940486362.804, "dur": 187.353, + "args": { + "External id": 983503,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 6094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2379450, + "ts": 6345940486373.217, "dur": 8.975, + "args": { + "External id": 983504,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940486379.444, "dur": 1.252, + "args": { + "External id": 983505,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345940486385.381, "dur": 9.373, + "args": { + "External id": 983506,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 6097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940486392.669, "dur": 0.542, + "args": { + "External id": 983507,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 6098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2379450, + "ts": 6345940486396.964, "dur": 3.284, + "args": { + "External id": 983508,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940486399.147, "dur": 0.608, + "args": { + "External id": 983509,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345940486401.666, "dur": 3.666, + "args": { + "External id": 983510,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940486403.993, "dur": 0.521, + "args": { + "External id": 983511,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 6102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345940486413.281, "dur": 4.326, + "args": { + "External id": 983512,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 6103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940486416.565, "dur": 0.648, + "args": { + "External id": 983513,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 6104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940486419.401, "dur": 8.623, + "args": { + "External id": 983514,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 6105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338710, "tid": 2379450, + "ts": 6345940486425.468, "dur": 2.358, + "args": { + "External id": 983515,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 6106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345940486429.483, "dur": 5.550, + "args": { + "External id": 983516,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 6107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940486431.776, "dur": 2.899, + "args": { + "External id": 983517,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 6108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940486436.254, "dur": 4.003, + "args": { + "External id": 983518,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940486438.013, "dur": 2.087, + "args": { + "External id": 983519,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345940486442.891, "dur": 88.715, + "args": { + "External id": 983520,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 6111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940486536.427, "dur": 1.397, + "args": { + "External id": 983521,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 6112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345940486539.383, "dur": 5.203, + "args": { + "External id": 983522,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 6113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940486542.880, "dur": 0.675, + "args": { + "External id": 983523,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 6114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940486547.461, "dur": 1.212, + "args": { + "External id": 983524,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 6115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940486612.476, "dur": 12.573, + "args": { + "External id": 983525,"Record function id": 0, "Ev Idx": 6116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940486615.516, "dur": 8.678, + "args": { + "External id": 983526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940486618.817, "dur": 4.145, + "args": { + "External id": 983527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940486620.078, "dur": 2.719, + "args": { + "External id": 983528,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940486629.964, "dur": 12.255, + "args": { + "External id": 983529,"Record function id": 0, "Sequence number": 10552661, "Fwd thread id": 1, "Ev Idx": 6120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940486631.675, "dur": 8.130, + "args": { + "External id": 983530,"Sequence number": 10552661, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6121 + } + }, + { + "ph": "f", "id": 258, "pid": 2338710, "tid": 2379450, "ts": 6345940486631.675, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940486634.486, "dur": 5.060, + "args": { + "External id": 983531,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940486638.392, "dur": 0.985, + "args": { + "External id": 983532,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940486646.620, "dur": 159.844, + "args": { + "External id": 983533,"Record function id": 0, "Sequence number": 10552660, "Fwd thread id": 1, "Ev Idx": 6124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940486647.561, "dur": 150.132, + "args": { + "External id": 983534,"Sequence number": 10552660, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6125 + } + }, + { + "ph": "f", "id": 259, "pid": 2338710, "tid": 2379450, "ts": 6345940486647.561, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940486652.488, "dur": 5.502, + "args": { + "External id": 983535,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940486654.287, "dur": 3.040, + "args": { + "External id": 983536,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940486656.543, "dur": 0.584, + "args": { + "External id": 983537,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940486659.606, "dur": 63.507, + "args": { + "External id": 983538,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940486727.275, "dur": 5.298, + "args": { + "External id": 983539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940486728.027, "dur": 3.735, + "args": { + "External id": 983540,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940486730.326, "dur": 1.221, + "args": { + "External id": 983541,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940486734.413, "dur": 5.025, + "args": { + "External id": 983542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940486735.728, "dur": 3.071, + "args": { + "External id": 983543,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940486738.013, "dur": 0.697, + "args": { + "External id": 983544,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940486743.837, "dur": 52.856, + "args": { + "External id": 983545,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940486812.308, "dur": 8.761, + "args": { + "External id": 983546,"Record function id": 0, "Sequence number": 10552659, "Fwd thread id": 1, "Ev Idx": 6137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940486813.736, "dur": 5.573, + "args": { + "External id": 983547,"Sequence number": 10552659, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6138 + } + }, + { + "ph": "f", "id": 260, "pid": 2338710, "tid": 2379450, "ts": 6345940486813.736, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940486816.024, "dur": 3.090, + "args": { + "External id": 983548,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940486817.579, "dur": 1.389, + "args": { + "External id": 983549,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940486825.532, "dur": 50.369, + "args": { + "External id": 983550,"Record function id": 0, "Sequence number": 10552658, "Fwd thread id": 1, "Ev Idx": 6141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940486826.498, "dur": 46.624, + "args": { + "External id": 983551,"Sequence number": 10552658, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6142 + } + }, + { + "ph": "f", "id": 261, "pid": 2338710, "tid": 2379450, "ts": 6345940486826.498, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940486827.526, "dur": 45.338, + "args": { + "External id": 983552,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940486866.661, "dur": 5.599, + "args": { + "External id": 988161,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940486871.613, "dur": 0.521, + "args": { + "External id": 988162,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940486880.993, "dur": 6.311, + "args": { + "External id": 988163,"Record function id": 0, "Ev Idx": 6146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940486882.790, "dur": 3.903, + "args": { + "External id": 988164,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940486884.121, "dur": 2.267, + "args": { + "External id": 988165,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940486884.800, "dur": 1.415, + "args": { + "External id": 988166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940486891.059, "dur": 6.476, + "args": { + "External id": 988167,"Record function id": 0, "Sequence number": 10552657, "Fwd thread id": 1, "Ev Idx": 6150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940486892.040, "dur": 3.047, + "args": { + "External id": 988168,"Sequence number": 10552657, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6151 + } + }, + { + "ph": "f", "id": 262, "pid": 2338710, "tid": 2379450, "ts": 6345940486892.040, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940486893.195, "dur": 1.716, + "args": { + "External id": 988169,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940486894.004, "dur": 0.736, + "args": { + "External id": 988170,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940486901.298, "dur": 138.762, + "args": { + "External id": 988171,"Record function id": 0, "Sequence number": 10552656, "Fwd thread id": 1, "Ev Idx": 6154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940486902.079, "dur": 126.135, + "args": { + "External id": 988172,"Sequence number": 10552656, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6155 + } + }, + { + "ph": "f", "id": 263, "pid": 2338710, "tid": 2379450, "ts": 6345940486902.079, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940486906.629, "dur": 3.765, + "args": { + "External id": 988173,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940486907.251, "dur": 2.586, + "args": { + "External id": 988174,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940486909.144, "dur": 0.576, + "args": { + "External id": 988175,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940486911.285, "dur": 41.944, + "args": { + "External id": 988176,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940486954.691, "dur": 6.303, + "args": { + "External id": 988177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940486955.520, "dur": 4.724, + "args": { + "External id": 988178,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940486959.412, "dur": 0.688, + "args": { + "External id": 988179,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940486962.347, "dur": 5.511, + "args": { + "External id": 988180,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940486963.081, "dur": 4.174, + "args": { + "External id": 988181,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940486964.591, "dur": 2.582, + "args": { + "External id": 988182,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940486968.329, "dur": 38.750, + "args": { + "External id": 988183,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940487051.232, "dur": 94.001, + "args": { + "External id": 988184,"Record function id": 0, "Sequence number": 10552655, "Fwd thread id": 1, "Ev Idx": 6167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940487052.714, "dur": 55.103, + "args": { + "External id": 988185,"Sequence number": 10552655, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6168 + } + }, + { + "ph": "f", "id": 264, "pid": 2338710, "tid": 2379450, "ts": 6345940487052.714, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940487054.840, "dur": 52.724, + "args": { + "External id": 988186,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940487103.467, "dur": 3.809, + "args": { + "External id": 988187,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2379450, + "ts": 6345940487112.886, "dur": 28.668, + "args": { + "External id": 988188,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940487155.421, "dur": 10.739, + "args": { + "External id": 988189,"Record function id": 0, "Sequence number": 10552654, "Fwd thread id": 1, "Ev Idx": 6172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940487156.702, "dur": 7.037, + "args": { + "External id": 988190,"Sequence number": 10552654, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6173 + } + }, + { + "ph": "f", "id": 265, "pid": 2338710, "tid": 2379450, "ts": 6345940487156.702, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940487157.568, "dur": 5.887, + "args": { + "External id": 988191,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940487158.649, "dur": 4.077, + "args": { + "External id": 988192,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940487161.587, "dur": 0.907, + "args": { + "External id": 988193,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940487171.252, "dur": 6.678, + "args": { + "External id": 988194,"Record function id": 0, "Ev Idx": 6177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940487172.839, "dur": 4.444, + "args": { + "External id": 988195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940487174.548, "dur": 2.377, + "args": { + "External id": 988196,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940487175.187, "dur": 1.600, + "args": { + "External id": 988197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940487182.941, "dur": 504.571, + "args": { + "External id": 988198,"Record function id": 0, "Sequence number": 10552653, "Fwd thread id": 1, "Ev Idx": 6181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940487184.674, "dur": 489.715, + "args": { + "External id": 988199,"Sequence number": 10552653, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 6182 + } + }, + { + "ph": "f", "id": 266, "pid": 2338710, "tid": 2379450, "ts": 6345940487184.674, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338710, "tid": 2379450, + "ts": 6345940487216.651, "dur": 45.007, + "args": { + "External id": 988200,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2379450, + "ts": 6345940487218.356, "dur": 43.070, + "args": { + "External id": 988201,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345940487221.927, "dur": 10.323, + "args": { + "External id": 988202,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 6185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940487227.757, "dur": 3.809, + "args": { + "External id": 988203,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940487233.928, "dur": 26.979, + "args": { + "External id": 988204,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940487276.409, "dur": 2.740, + "args": { + "External id": 988205,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940487277.475, "dur": 1.463, + "args": { + "External id": 988206,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940487284.513, "dur": 4.795, + "args": { + "External id": 988207,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940487288.267, "dur": 0.929, + "args": { + "External id": 988208,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940487304.104, "dur": 5.177, + "args": { + "External id": 988209,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940487323.072, "dur": 2.686, + "args": { + "External id": 988210,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940487540.606, "dur": 2.524, + "args": { + "External id": 988211,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 6194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940487548.860, "dur": 42.769, + "args": { + "External id": 988212,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 6195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940487564.933, "dur": 0.947, + "args": { + "External id": 988213,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 6196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345940487598.947, "dur": 36.611, + "args": { + "External id": 988214,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 6197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345940487600.882, "dur": 34.330, + "args": { + "External id": 988215,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 6198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940487608.951, "dur": 5.291, + "args": { + "External id": 988216,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940487616.079, "dur": 18.390, + "args": { + "External id": 988217,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 6200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2379450, + "ts": 6345940487641.481, "dur": 2.842, + "args": { + "External id": 988218,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 6201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940487643.104, "dur": 1.092, + "args": { + "External id": 988219,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 6202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940487652.262, "dur": 2.762, + "args": { + "External id": 988220,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940487653.850, "dur": 1.078, + "args": { + "External id": 988221,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940487658.065, "dur": 5.908, + "args": { + "External id": 988222,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940487662.417, "dur": 1.455, + "args": { + "External id": 988223,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940487698.286, "dur": 9.031, + "args": { + "External id": 988224,"Record function id": 0, "Ev Idx": 6207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940487700.728, "dur": 5.749, + "args": { + "External id": 988225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940487702.539, "dur": 2.939, + "args": { + "External id": 988226,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940487703.840, "dur": 1.496, + "args": { + "External id": 988227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940487711.511, "dur": 11.385, + "args": { + "External id": 988228,"Record function id": 0, "Sequence number": 10552652, "Fwd thread id": 1, "Ev Idx": 6211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940487712.756, "dur": 6.713, + "args": { + "External id": 988229,"Sequence number": 10552652, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6212 + } + }, + { + "ph": "f", "id": 267, "pid": 2338710, "tid": 2379450, "ts": 6345940487712.756, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940487714.606, "dur": 4.652, + "args": { + "External id": 988230,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940487717.733, "dur": 1.399, + "args": { + "External id": 988231,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940487727.065, "dur": 169.299, + "args": { + "External id": 988232,"Record function id": 0, "Sequence number": 10552651, "Fwd thread id": 1, "Ev Idx": 6215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940487728.370, "dur": 162.742, + "args": { + "External id": 988233,"Sequence number": 10552651, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6216 + } + }, + { + "ph": "f", "id": 268, "pid": 2338710, "tid": 2379450, "ts": 6345940487728.370, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940487731.853, "dur": 5.142, + "args": { + "External id": 988234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940487733.163, "dur": 3.129, + "args": { + "External id": 988235,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 6218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940487735.187, "dur": 0.914, + "args": { + "External id": 988236,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940487738.225, "dur": 87.248, + "args": { + "External id": 988237,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 6220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940487827.371, "dur": 8.102, + "args": { + "External id": 988238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940487831.223, "dur": 3.534, + "args": { + "External id": 988239,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940487833.777, "dur": 0.762, + "args": { + "External id": 988240,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940487837.450, "dur": 6.174, + "args": { + "External id": 988241,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940487838.447, "dur": 4.519, + "args": { + "External id": 988242,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940487840.047, "dur": 2.808, + "args": { + "External id": 988243,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940487844.286, "dur": 45.857, + "args": { + "External id": 988244,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 6227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940487902.122, "dur": 9.249, + "args": { + "External id": 988245,"Record function id": 0, "Sequence number": 10552650, "Fwd thread id": 1, "Ev Idx": 6228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940487903.623, "dur": 5.084, + "args": { + "External id": 988246,"Sequence number": 10552650, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6229 + } + }, + { + "ph": "f", "id": 269, "pid": 2338710, "tid": 2379450, "ts": 6345940487903.623, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940487905.773, "dur": 2.732, + "args": { + "External id": 988247,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940487907.147, "dur": 1.235, + "args": { + "External id": 988248,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940487915.921, "dur": 13.624, + "args": { + "External id": 988249,"Record function id": 0, "Sequence number": 10552649, "Fwd thread id": 1, "Ev Idx": 6232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940487917.016, "dur": 10.317, + "args": { + "External id": 988250,"Sequence number": 10552649, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6233 + } + }, + { + "ph": "f", "id": 270, "pid": 2338710, "tid": 2379450, "ts": 6345940487917.016, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940487920.537, "dur": 6.499, + "args": { + "External id": 988251,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940487921.190, "dur": 5.194, + "args": { + "External id": 988252,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940487923.203, "dur": 2.940, + "args": { + "External id": 988253,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940487934.330, "dur": 6.323, + "args": { + "External id": 988254,"Record function id": 0, "Ev Idx": 6237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940487936.121, "dur": 3.922, + "args": { + "External id": 988255,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940487937.311, "dur": 2.443, + "args": { + "External id": 988256,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940487938.327, "dur": 1.290, + "args": { + "External id": 988257,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940487944.366, "dur": 8.424, + "args": { + "External id": 988258,"Record function id": 0, "Sequence number": 10552648, "Fwd thread id": 1, "Ev Idx": 6241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940487945.601, "dur": 4.677, + "args": { + "External id": 988259,"Sequence number": 10552648, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6242 + } + }, + { + "ph": "f", "id": 271, "pid": 2338710, "tid": 2379450, "ts": 6345940487945.601, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940487947.338, "dur": 2.784, + "args": { + "External id": 988260,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940487948.596, "dur": 1.377, + "args": { + "External id": 988261,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940487958.472, "dur": 528.965, + "args": { + "External id": 988262,"Record function id": 0, "Sequence number": 10552647, "Fwd thread id": 1, "Ev Idx": 6245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940487959.906, "dur": 506.236, + "args": { + "External id": 988263,"Sequence number": 10552647, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6246 + } + }, + { + "ph": "f", "id": 272, "pid": 2338710, "tid": 2379450, "ts": 6345940487959.906, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345940487977.273, "dur": 8.190, + "args": { + "External id": 988264,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940487980.437, "dur": 4.285, + "args": { + "External id": 988265,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345940487988.022, "dur": 5.355, + "args": { + "External id": 988266,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940487990.868, "dur": 2.269, + "args": { + "External id": 988267,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345940487994.980, "dur": 8.593, + "args": { + "External id": 988268,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940487997.507, "dur": 5.855, + "args": { + "External id": 988269,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345940488112.333, "dur": 321.020, + "args": { + "External id": 988270,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 6253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940488236.402, "dur": 5.190, + "args": { + "External id": 988271,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940488244.545, "dur": 3.046, + "args": { + "External id": 988272,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940488249.209, "dur": 1.884, + "args": { + "External id": 988273,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940488252.537, "dur": 2.844, + "args": { + "External id": 988274,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940488315.458, "dur": 3.730, + "args": { + "External id": 988275,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940488316.740, "dur": 2.307, + "args": { + "External id": 988276,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940488324.377, "dur": 37.491, + "args": { + "External id": 988277,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940488335.253, "dur": 1.285, + "args": { + "External id": 988278,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940488363.598, "dur": 1.778, + "args": { + "External id": 988279,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940488364.708, "dur": 0.551, + "args": { + "External id": 988280,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940488366.662, "dur": 16.297, + "args": { + "External id": 988281,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940488369.287, "dur": 0.863, + "args": { + "External id": 988282,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2379450, + "ts": 6345940488449.870, "dur": 4.712, + "args": { + "External id": 988283,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2379450, + "ts": 6345940488458.420, "dur": 0.918, + "args": { + "External id": 988284,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2379450, + "ts": 6345940488461.798, "dur": 0.779, + "args": { + "External id": 988285,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940488501.976, "dur": 268.180, + "args": { + "External id": 988286,"Record function id": 0, "Sequence number": 10552646, "Fwd thread id": 1, "Ev Idx": 6269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940488503.995, "dur": 259.671, + "args": { + "External id": 988287,"Sequence number": 10552646, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6270 + } + }, + { + "ph": "f", "id": 273, "pid": 2338710, "tid": 2379450, "ts": 6345940488503.995, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2379450, + "ts": 6345940488528.338, "dur": 57.247, + "args": { + "External id": 988288,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940488532.941, "dur": 4.591, + "args": { + "External id": 988289,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940488539.327, "dur": 45.521, + "args": { + "External id": 988290,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 6273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345940488597.375, "dur": 5.785, + "args": { + "External id": 988291,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940488599.951, "dur": 2.828, + "args": { + "External id": 988292,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940488778.169, "dur": 202.875, + "args": { + "External id": 988293,"Record function id": 0, "Sequence number": 10552645, "Fwd thread id": 1, "Ev Idx": 6276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940488780.391, "dur": 192.431, + "args": { + "External id": 988294,"Sequence number": 10552645, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6277 + } + }, + { + "ph": "f", "id": 274, "pid": 2338710, "tid": 2379450, "ts": 6345940488780.391, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2379450, + "ts": 6345940488793.535, "dur": 52.528, + "args": { + "External id": 988295,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940488796.779, "dur": 3.265, + "args": { + "External id": 988296,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940488801.321, "dur": 44.265, + "args": { + "External id": 988297,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 6280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345940488854.638, "dur": 6.555, + "args": { + "External id": 988298,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940488857.822, "dur": 3.036, + "args": { + "External id": 988299,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940488989.098, "dur": 14.457, + "args": { + "External id": 988300,"Record function id": 0, "Sequence number": 10552644, "Fwd thread id": 1, "Ev Idx": 6283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940488990.411, "dur": 9.667, + "args": { + "External id": 988301,"Sequence number": 10552644, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6284 + } + }, + { + "ph": "f", "id": 275, "pid": 2338710, "tid": 2379450, "ts": 6345940488990.411, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940488993.823, "dur": 5.990, + "args": { + "External id": 988302,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940488995.418, "dur": 4.207, + "args": { + "External id": 988303,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489026.527, "dur": 11.366, + "args": { + "External id": 988304,"Record function id": 0, "Sequence number": 10552643, "Fwd thread id": 1, "Ev Idx": 6287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489028.847, "dur": 6.203, + "args": { + "External id": 988305,"Sequence number": 10552643, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6288 + } + }, + { + "ph": "f", "id": 276, "pid": 2338710, "tid": 2379450, "ts": 6345940489028.847, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940489030.757, "dur": 4.080, + "args": { + "External id": 988306,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940489032.213, "dur": 2.343, + "args": { + "External id": 988307,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489043.519, "dur": 66.618, + "args": { + "External id": 988308,"Record function id": 0, "Sequence number": 10552642, "Fwd thread id": 1, "Ev Idx": 6291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489048.573, "dur": 57.970, + "args": { + "External id": 988309,"Sequence number": 10552642, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6292 + } + }, + { + "ph": "f", "id": 277, "pid": 2338710, "tid": 2379450, "ts": 6345940489048.573, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940489053.378, "dur": 52.915, + "args": { + "External id": 988310,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940489054.689, "dur": 49.866, + "args": { + "External id": 988311,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489116.823, "dur": 10.993, + "args": { + "External id": 988312,"Record function id": 0, "Sequence number": 10552641, "Fwd thread id": 1, "Ev Idx": 6295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489118.371, "dur": 6.906, + "args": { + "External id": 988313,"Sequence number": 10552641, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 6296 + } + }, + { + "ph": "f", "id": 278, "pid": 2338710, "tid": 2379450, "ts": 6345940489118.371, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940489120.006, "dur": 5.099, + "args": { + "External id": 988314,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940489121.075, "dur": 3.862, + "args": { + "External id": 988315,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489131.852, "dur": 202.712, + "args": { + "External id": 988316,"Record function id": 0, "Sequence number": 10552640, "Fwd thread id": 1, "Ev Idx": 6299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489133.080, "dur": 192.990, + "args": { + "External id": 988317,"Sequence number": 10552640, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6300 + } + }, + { + "ph": "f", "id": 279, "pid": 2338710, "tid": 2379450, "ts": 6345940489133.080, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940489137.719, "dur": 11.127, + "args": { + "External id": 988318,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940489143.150, "dur": 4.963, + "args": { + "External id": 988319,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 6302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940489146.023, "dur": 1.741, + "args": { + "External id": 988320,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 6303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940489150.693, "dur": 92.170, + "args": { + "External id": 988321,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 6304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940489244.347, "dur": 8.457, + "args": { + "External id": 988322,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940489245.626, "dur": 6.085, + "args": { + "External id": 988323,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 6306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940489247.583, "dur": 3.810, + "args": { + "External id": 988324,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 6307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940489257.644, "dur": 4.513, + "args": { + "External id": 988325,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940489258.934, "dur": 2.721, + "args": { + "External id": 988326,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940489260.828, "dur": 0.725, + "args": { + "External id": 988327,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940489262.852, "dur": 62.179, + "args": { + "External id": 988328,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 6311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489340.627, "dur": 9.061, + "args": { + "External id": 988329,"Record function id": 0, "Sequence number": 10552639, "Fwd thread id": 1, "Ev Idx": 6312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489342.013, "dur": 5.221, + "args": { + "External id": 988330,"Sequence number": 10552639, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6313 + } + }, + { + "ph": "f", "id": 280, "pid": 2338710, "tid": 2379450, "ts": 6345940489342.013, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940489344.285, "dur": 2.790, + "args": { + "External id": 988331,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940489345.548, "dur": 1.366, + "args": { + "External id": 988332,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489354.464, "dur": 11.915, + "args": { + "External id": 988333,"Record function id": 0, "Sequence number": 10552638, "Fwd thread id": 1, "Ev Idx": 6316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489355.494, "dur": 8.120, + "args": { + "External id": 988334,"Sequence number": 10552638, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6317 + } + }, + { + "ph": "f", "id": 281, "pid": 2338710, "tid": 2379450, "ts": 6345940489355.494, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940489359.357, "dur": 3.987, + "args": { + "External id": 988335,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940489360.420, "dur": 2.349, + "args": { + "External id": 988336,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940489362.112, "dur": 0.526, + "args": { + "External id": 988337,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940489373.482, "dur": 13.645, + "args": { + "External id": 988338,"Record function id": 0, "Ev Idx": 6321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940489375.510, "dur": 10.813, + "args": { + "External id": 988339,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940489378.597, "dur": 7.261, + "args": { + "External id": 988340,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940489380.272, "dur": 5.474, + "args": { + "External id": 988341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489390.938, "dur": 7.583, + "args": { + "External id": 988342,"Record function id": 0, "Sequence number": 10552637, "Fwd thread id": 1, "Ev Idx": 6325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489392.122, "dur": 3.966, + "args": { + "External id": 988343,"Sequence number": 10552637, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 6326 + } + }, + { + "ph": "f", "id": 282, "pid": 2338710, "tid": 2379450, "ts": 6345940489392.122, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940489393.833, "dur": 2.097, + "args": { + "External id": 988344,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940489394.616, "dur": 1.192, + "args": { + "External id": 988345,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489403.964, "dur": 107.381, + "args": { + "External id": 988346,"Record function id": 0, "Sequence number": 10552636, "Fwd thread id": 1, "Ev Idx": 6329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489407.334, "dur": 96.887, + "args": { + "External id": 988347,"Sequence number": 10552636, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6330 + } + }, + { + "ph": "f", "id": 283, "pid": 2338710, "tid": 2379450, "ts": 6345940489407.334, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940489409.251, "dur": 3.726, + "args": { + "External id": 988348,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940489410.200, "dur": 2.267, + "args": { + "External id": 988349,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 6332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940489411.646, "dur": 0.676, + "args": { + "External id": 988350,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 6333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940489413.668, "dur": 30.702, + "args": { + "External id": 988351,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 6334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940489445.755, "dur": 11.417, + "args": { + "External id": 988352,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940489446.441, "dur": 9.994, + "args": { + "External id": 988353,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 6336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940489453.214, "dur": 3.055, + "args": { + "External id": 988354,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 6337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940489458.555, "dur": 3.574, + "args": { + "External id": 988355,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940489459.725, "dur": 1.902, + "args": { + "External id": 988356,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940489460.954, "dur": 0.561, + "args": { + "External id": 988357,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940489462.626, "dur": 40.583, + "args": { + "External id": 988358,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 6341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489516.757, "dur": 40.718, + "args": { + "External id": 988359,"Record function id": 0, "Sequence number": 10552635, "Fwd thread id": 1, "Ev Idx": 6342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489518.047, "dur": 5.349, + "args": { + "External id": 988360,"Sequence number": 10552635, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6343 + } + }, + { + "ph": "f", "id": 284, "pid": 2338710, "tid": 2379450, "ts": 6345940489518.047, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940489520.357, "dur": 2.864, + "args": { + "External id": 988361,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940489521.847, "dur": 1.235, + "args": { + "External id": 988362,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2379450, + "ts": 6345940489527.109, "dur": 27.316, + "args": { + "External id": 988363,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489565.300, "dur": 8.793, + "args": { + "External id": 988364,"Record function id": 0, "Sequence number": 10552634, "Fwd thread id": 1, "Ev Idx": 6347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489566.371, "dur": 5.708, + "args": { + "External id": 988365,"Sequence number": 10552634, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6348 + } + }, + { + "ph": "f", "id": 285, "pid": 2338710, "tid": 2379450, "ts": 6345940489566.371, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940489567.113, "dur": 4.735, + "args": { + "External id": 988366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940489568.193, "dur": 2.947, + "args": { + "External id": 988367,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940489570.094, "dur": 0.914, + "args": { + "External id": 988368,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940489578.873, "dur": 6.549, + "args": { + "External id": 988369,"Record function id": 0, "Ev Idx": 6352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940489580.886, "dur": 4.003, + "args": { + "External id": 988370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940489582.198, "dur": 2.298, + "args": { + "External id": 988371,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940489583.028, "dur": 1.349, + "args": { + "External id": 988372,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489589.170, "dur": 8.463, + "args": { + "External id": 988373,"Record function id": 0, "Sequence number": 10552633, "Fwd thread id": 1, "Ev Idx": 6356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489590.478, "dur": 5.083, + "args": { + "External id": 988374,"Sequence number": 10552633, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6357 + } + }, + { + "ph": "f", "id": 286, "pid": 2338710, "tid": 2379450, "ts": 6345940489590.478, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940489591.459, "dur": 3.916, + "args": { + "External id": 988375,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940489594.368, "dur": 0.899, + "args": { + "External id": 988376,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489601.490, "dur": 113.705, + "args": { + "External id": 988377,"Record function id": 0, "Sequence number": 10552632, "Fwd thread id": 1, "Ev Idx": 6360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489602.228, "dur": 104.477, + "args": { + "External id": 988378,"Sequence number": 10552632, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6361 + } + }, + { + "ph": "f", "id": 287, "pid": 2338710, "tid": 2379450, "ts": 6345940489602.228, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940489604.557, "dur": 2.881, + "args": { + "External id": 988379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940489605.279, "dur": 1.660, + "args": { + "External id": 988380,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 6363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940489606.342, "dur": 0.459, + "args": { + "External id": 988381,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940489610.750, "dur": 39.876, + "args": { + "External id": 988382,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 6365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940489652.009, "dur": 5.145, + "args": { + "External id": 988383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940489652.975, "dur": 3.452, + "args": { + "External id": 988384,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940489654.949, "dur": 1.334, + "args": { + "External id": 988385,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940489658.427, "dur": 7.175, + "args": { + "External id": 988386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940489659.708, "dur": 5.187, + "args": { + "External id": 988387,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940489664.173, "dur": 0.645, + "args": { + "External id": 988388,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940489666.361, "dur": 39.587, + "args": { + "External id": 988389,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 6372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489720.777, "dur": 28.357, + "args": { + "External id": 988390,"Record function id": 0, "Sequence number": 10552631, "Fwd thread id": 1, "Ev Idx": 6373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489721.984, "dur": 5.212, + "args": { + "External id": 988391,"Sequence number": 10552631, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6374 + } + }, + { + "ph": "f", "id": 288, "pid": 2338710, "tid": 2379450, "ts": 6345940489721.984, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940489724.195, "dur": 2.836, + "args": { + "External id": 988392,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940489725.474, "dur": 1.446, + "args": { + "External id": 988393,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940489729.953, "dur": 16.987, + "args": { + "External id": 988394,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489753.364, "dur": 10.728, + "args": { + "External id": 988395,"Record function id": 0, "Sequence number": 10552630, "Fwd thread id": 1, "Ev Idx": 6378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940489754.337, "dur": 7.677, + "args": { + "External id": 988396,"Sequence number": 10552630, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6379 + } + }, + { + "ph": "f", "id": 289, "pid": 2338710, "tid": 2379450, "ts": 6345940489754.337, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940489755.283, "dur": 6.485, + "args": { + "External id": 988397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940489756.533, "dur": 4.505, + "args": { + "External id": 988398,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940489760.002, "dur": 0.852, + "args": { + "External id": 988399,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940489768.993, "dur": 6.619, + "args": { + "External id": 988400,"Record function id": 0, "Ev Idx": 6383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940489770.488, "dur": 4.507, + "args": { + "External id": 988401,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940489771.882, "dur": 2.324, + "args": { + "External id": 988402,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940489772.684, "dur": 1.403, + "args": { + "External id": 988403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940489780.609, "dur": 521.409, + "args": { + "External id": 988404,"Record function id": 0, "Sequence number": 10552629, "Fwd thread id": 1, "Ev Idx": 6387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940489782.241, "dur": 476.591, + "args": { + "External id": 988405,"Sequence number": 10552629, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6388 + } + }, + { + "ph": "f", "id": 290, "pid": 2338710, "tid": 2379450, "ts": 6345940489782.241, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940489820.994, "dur": 2.966, + "args": { + "External id": 988406,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940489822.208, "dur": 1.642, + "args": { + "External id": 988407,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940489842.262, "dur": 4.864, + "args": { + "External id": 988408,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940489858.298, "dur": 2.391, + "args": { + "External id": 988409,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940490125.564, "dur": 4.854, + "args": { + "External id": 988410,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 6393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940490135.828, "dur": 48.122, + "args": { + "External id": 988411,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 6394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940490152.813, "dur": 1.110, + "args": { + "External id": 988412,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 6395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345940490191.168, "dur": 39.476, + "args": { + "External id": 988413,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 6396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345940490193.363, "dur": 37.041, + "args": { + "External id": 988414,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 6397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940490201.096, "dur": 5.719, + "args": { + "External id": 988415,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940490209.334, "dur": 20.354, + "args": { + "External id": 988416,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 6399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2379450, + "ts": 6345940490236.219, "dur": 3.498, + "args": { + "External id": 988417,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 6400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940490238.211, "dur": 1.335, + "args": { + "External id": 988418,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 6401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940490247.853, "dur": 2.483, + "args": { + "External id": 988419,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940490248.931, "dur": 1.257, + "args": { + "External id": 988420,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2379450, + "ts": 6345940490273.383, "dur": 20.534, + "args": { + "External id": 988421,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940490318.064, "dur": 10.849, + "args": { + "External id": 988422,"Record function id": 0, "Ev Idx": 6405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940490320.544, "dur": 7.444, + "args": { + "External id": 988423,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940490323.195, "dur": 3.667, + "args": { + "External id": 988424,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940490324.657, "dur": 2.066, + "args": { + "External id": 988425,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940490333.557, "dur": 7.081, + "args": { + "External id": 988426,"Record function id": 0, "Sequence number": 10552628, "Fwd thread id": 1, "Ev Idx": 6409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940490335.276, "dur": 1.579, + "args": { + "External id": 988427,"Sequence number": 10552628, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6410 + } + }, + { + "ph": "f", "id": 291, "pid": 2338710, "tid": 2379450, "ts": 6345940490335.276, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940490345.356, "dur": 485.263, + "args": { + "External id": 988428,"Record function id": 0, "Sequence number": 10552627, "Fwd thread id": 1, "Ev Idx": 6411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940490347.059, "dur": 469.787, + "args": { + "External id": 988429,"Sequence number": 10552627, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6412 + } + }, + { + "ph": "f", "id": 292, "pid": 2338710, "tid": 2379450, "ts": 6345940490347.059, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940490388.511, "dur": 10.117, + "args": { + "External id": 988430,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338710, "tid": 2379450, + "ts": 6345940490394.637, "dur": 3.649, + "args": { + "External id": 988431,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 6414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940490401.889, "dur": 5.319, + "args": { + "External id": 988432,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940490403.793, "dur": 2.654, + "args": { + "External id": 988433,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940490405.624, "dur": 0.642, + "args": { + "External id": 988434,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2379450, + "ts": 6345940490414.218, "dur": 109.905, + "args": { + "External id": 988435,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 6418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940490415.316, "dur": 5.375, + "args": { + "External id": 988436,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 6419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940490416.125, "dur": 3.994, + "args": { + "External id": 988437,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940490417.513, "dur": 2.382, + "args": { + "External id": 988438,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2379450, + "ts": 6345940490422.020, "dur": 101.583, + "args": { + "External id": 988439,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940490423.826, "dur": 98.778, + "args": { + "External id": 988440,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2379450, + "ts": 6345940490528.508, "dur": 7.030, + "args": { + "External id": 988441,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 6424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940490533.404, "dur": 1.974, + "args": { + "External id": 988442,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940490574.014, "dur": 7.658, + "args": { + "External id": 988443,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940490583.362, "dur": 1.920, + "args": { + "External id": 988444,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940490586.434, "dur": 2.591, + "args": { + "External id": 988445,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940490622.726, "dur": 2.858, + "args": { + "External id": 988446,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940490623.857, "dur": 1.531, + "args": { + "External id": 988447,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338710, "tid": 2379450, + "ts": 6345940490653.107, "dur": 141.691, + "args": { + "External id": 988448,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 6431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2379450, + "ts": 6345940490661.582, "dur": 6.707, + "args": { + "External id": 988449,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940490666.523, "dur": 0.902, + "args": { + "External id": 988450,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345940490670.207, "dur": 5.718, + "args": { + "External id": 988451,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 6434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940490673.991, "dur": 0.810, + "args": { + "External id": 988452,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 6435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2379450, + "ts": 6345940490677.545, "dur": 3.475, + "args": { + "External id": 988453,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940490679.988, "dur": 0.653, + "args": { + "External id": 988454,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345940490681.903, "dur": 3.635, + "args": { + "External id": 988455,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940490684.298, "dur": 0.507, + "args": { + "External id": 988456,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 6439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345940490690.505, "dur": 7.708, + "args": { + "External id": 988457,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 6440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940490694.814, "dur": 2.979, + "args": { + "External id": 988458,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 6441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940490702.067, "dur": 6.942, + "args": { + "External id": 988459,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 6442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338710, "tid": 2379450, + "ts": 6345940490706.600, "dur": 2.223, + "args": { + "External id": 988460,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 6443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345940490710.671, "dur": 2.865, + "args": { + "External id": 988461,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 6444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940490712.772, "dur": 0.375, + "args": { + "External id": 988462,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 6445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940490714.836, "dur": 3.002, + "args": { + "External id": 988463,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940490716.054, "dur": 1.644, + "args": { + "External id": 988464,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345940490719.386, "dur": 61.198, + "args": { + "External id": 988465,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 6448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940490782.896, "dur": 1.228, + "args": { + "External id": 988466,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 6449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345940490785.740, "dur": 4.252, + "args": { + "External id": 988467,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 6450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940490788.303, "dur": 0.899, + "args": { + "External id": 988468,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 6451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940490792.539, "dur": 1.056, + "args": { + "External id": 988469,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 6452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940490842.449, "dur": 9.203, + "args": { + "External id": 988470,"Record function id": 0, "Ev Idx": 6453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940490844.652, "dur": 6.175, + "args": { + "External id": 988471,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940490846.421, "dur": 3.427, + "args": { + "External id": 988472,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940490847.482, "dur": 2.217, + "args": { + "External id": 988473,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940490856.242, "dur": 9.269, + "args": { + "External id": 988474,"Record function id": 0, "Sequence number": 10552626, "Fwd thread id": 1, "Ev Idx": 6457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940490857.724, "dur": 4.774, + "args": { + "External id": 988475,"Sequence number": 10552626, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6458 + } + }, + { + "ph": "f", "id": 293, "pid": 2338710, "tid": 2379450, "ts": 6345940490857.724, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940490859.565, "dur": 2.735, + "args": { + "External id": 988476,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940490860.975, "dur": 1.116, + "args": { + "External id": 988477,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940490870.264, "dur": 122.157, + "args": { + "External id": 988478,"Record function id": 0, "Sequence number": 10552625, "Fwd thread id": 1, "Ev Idx": 6461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940490871.177, "dur": 113.918, + "args": { + "External id": 988479,"Sequence number": 10552625, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6462 + } + }, + { + "ph": "f", "id": 294, "pid": 2338710, "tid": 2379450, "ts": 6345940490871.177, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940490874.490, "dur": 7.225, + "args": { + "External id": 988480,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940490878.588, "dur": 2.471, + "args": { + "External id": 988481,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940490880.225, "dur": 0.667, + "args": { + "External id": 988482,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940490882.945, "dur": 46.555, + "args": { + "External id": 988483,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940490931.192, "dur": 4.958, + "args": { + "External id": 988484,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940490932.559, "dur": 2.861, + "args": { + "External id": 988485,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940490934.188, "dur": 1.011, + "args": { + "External id": 988486,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940490939.863, "dur": 5.797, + "args": { + "External id": 988487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940490940.892, "dur": 4.194, + "args": { + "External id": 988488,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940490942.378, "dur": 2.601, + "args": { + "External id": 988489,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940490946.417, "dur": 37.685, + "args": { + "External id": 988490,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940490997.974, "dur": 7.216, + "args": { + "External id": 988491,"Record function id": 0, "Sequence number": 10552624, "Fwd thread id": 1, "Ev Idx": 6474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940490998.991, "dur": 4.648, + "args": { + "External id": 988492,"Sequence number": 10552624, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6475 + } + }, + { + "ph": "f", "id": 295, "pid": 2338710, "tid": 2379450, "ts": 6345940490998.991, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940491000.930, "dur": 2.509, + "args": { + "External id": 988493,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940491001.974, "dur": 1.312, + "args": { + "External id": 988494,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940491033.230, "dur": 13.572, + "args": { + "External id": 988495,"Record function id": 0, "Sequence number": 10552623, "Fwd thread id": 1, "Ev Idx": 6478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940491035.033, "dur": 9.295, + "args": { + "External id": 988496,"Sequence number": 10552623, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6479 + } + }, + { + "ph": "f", "id": 296, "pid": 2338710, "tid": 2379450, "ts": 6345940491035.033, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940491036.215, "dur": 7.812, + "args": { + "External id": 988497,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940491039.493, "dur": 3.670, + "args": { + "External id": 988498,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940491042.036, "dur": 0.877, + "args": { + "External id": 988499,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940491052.673, "dur": 59.247, + "args": { + "External id": 988500,"Record function id": 0, "Ev Idx": 6483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940491054.836, "dur": 55.818, + "args": { + "External id": 988501,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940491105.766, "dur": 4.437, + "args": { + "External id": 988502,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940491106.863, "dur": 3.030, + "args": { + "External id": 988503,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940491118.083, "dur": 9.270, + "args": { + "External id": 988504,"Record function id": 0, "Sequence number": 10552622, "Fwd thread id": 1, "Ev Idx": 6487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940491120.103, "dur": 4.604, + "args": { + "External id": 988505,"Sequence number": 10552622, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6488 + } + }, + { + "ph": "f", "id": 297, "pid": 2338710, "tid": 2379450, "ts": 6345940491120.103, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940491121.534, "dur": 2.960, + "args": { + "External id": 988506,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940491123.066, "dur": 1.279, + "args": { + "External id": 988507,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940491131.366, "dur": 123.805, + "args": { + "External id": 988508,"Record function id": 0, "Sequence number": 10552621, "Fwd thread id": 1, "Ev Idx": 6491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940491135.077, "dur": 111.117, + "args": { + "External id": 988509,"Sequence number": 10552621, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6492 + } + }, + { + "ph": "f", "id": 298, "pid": 2338710, "tid": 2379450, "ts": 6345940491135.077, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940491137.968, "dur": 3.959, + "args": { + "External id": 988510,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940491138.961, "dur": 2.455, + "args": { + "External id": 988511,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940491140.326, "dur": 0.966, + "args": { + "External id": 988512,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940491142.920, "dur": 47.891, + "args": { + "External id": 988513,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940491192.363, "dur": 9.985, + "args": { + "External id": 988514,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940491193.633, "dur": 8.100, + "args": { + "External id": 988515,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940491197.317, "dur": 4.265, + "args": { + "External id": 988516,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940491203.660, "dur": 3.552, + "args": { + "External id": 988517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940491204.803, "dur": 1.870, + "args": { + "External id": 988518,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940491205.948, "dur": 0.643, + "args": { + "External id": 988519,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940491208.193, "dur": 37.055, + "args": { + "External id": 988520,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940491260.542, "dur": 35.250, + "args": { + "External id": 988521,"Record function id": 0, "Sequence number": 10552620, "Fwd thread id": 1, "Ev Idx": 6504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940491261.765, "dur": 5.045, + "args": { + "External id": 988522,"Sequence number": 10552620, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6505 + } + }, + { + "ph": "f", "id": 299, "pid": 2338710, "tid": 2379450, "ts": 6345940491261.765, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940491263.901, "dur": 2.747, + "args": { + "External id": 988523,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940491264.941, "dur": 1.586, + "args": { + "External id": 988524,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2379450, + "ts": 6345940491270.063, "dur": 22.691, + "args": { + "External id": 988525,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940491300.541, "dur": 10.700, + "args": { + "External id": 988526,"Record function id": 0, "Sequence number": 10552619, "Fwd thread id": 1, "Ev Idx": 6509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940491301.793, "dur": 7.552, + "args": { + "External id": 988527,"Sequence number": 10552619, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6510 + } + }, + { + "ph": "f", "id": 300, "pid": 2338710, "tid": 2379450, "ts": 6345940491301.793, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940491304.917, "dur": 4.193, + "args": { + "External id": 988528,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940491305.804, "dur": 2.694, + "args": { + "External id": 988529,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940491307.613, "dur": 0.706, + "args": { + "External id": 988530,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940491316.235, "dur": 5.821, + "args": { + "External id": 988531,"Record function id": 0, "Ev Idx": 6514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940491318.150, "dur": 3.339, + "args": { + "External id": 988532,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940491319.434, "dur": 1.741, + "args": { + "External id": 988533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940491320.024, "dur": 1.052, + "args": { + "External id": 988534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940491326.861, "dur": 479.527, + "args": { + "External id": 988535,"Record function id": 0, "Sequence number": 10552618, "Fwd thread id": 1, "Ev Idx": 6518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940491328.458, "dur": 438.664, + "args": { + "External id": 988536,"Sequence number": 10552618, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 6519 + } + }, + { + "ph": "f", "id": 301, "pid": 2338710, "tid": 2379450, "ts": 6345940491328.458, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338710, "tid": 2379450, + "ts": 6345940491356.174, "dur": 38.020, + "args": { + "External id": 988537,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2379450, + "ts": 6345940491357.928, "dur": 36.046, + "args": { + "External id": 988538,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345940491361.408, "dur": 7.590, + "args": { + "External id": 988539,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 6522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940491364.614, "dur": 3.546, + "args": { + "External id": 988540,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940491370.486, "dur": 22.855, + "args": { + "External id": 988541,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940491406.667, "dur": 7.827, + "args": { + "External id": 988542,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940491410.691, "dur": 3.649, + "args": { + "External id": 988543,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940491419.792, "dur": 1.975, + "args": { + "External id": 988544,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940491420.648, "dur": 1.021, + "args": { + "External id": 988545,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940491435.318, "dur": 3.130, + "args": { + "External id": 988546,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940491453.174, "dur": 2.324, + "args": { + "External id": 988547,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940491644.109, "dur": 4.570, + "args": { + "External id": 988548,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 6531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940491653.831, "dur": 35.523, + "args": { + "External id": 988549,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 6532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940491665.392, "dur": 0.932, + "args": { + "External id": 988550,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 6533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345940491696.013, "dur": 33.935, + "args": { + "External id": 988551,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 6534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345940491698.220, "dur": 31.495, + "args": { + "External id": 988552,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 6535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940491703.352, "dur": 5.605, + "args": { + "External id": 988553,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940491713.062, "dur": 16.099, + "args": { + "External id": 988554,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 6537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2379450, + "ts": 6345940491735.541, "dur": 3.058, + "args": { + "External id": 988555,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 6538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940491736.980, "dur": 1.463, + "args": { + "External id": 988556,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 6539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940491746.807, "dur": 5.477, + "args": { + "External id": 988557,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940491747.896, "dur": 4.254, + "args": { + "External id": 988558,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940491755.142, "dur": 2.085, + "args": { + "External id": 988559,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940491756.356, "dur": 0.771, + "args": { + "External id": 988560,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940491785.651, "dur": 18.958, + "args": { + "External id": 988561,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940491817.708, "dur": 8.276, + "args": { + "External id": 988562,"Record function id": 0, "Ev Idx": 6545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940491819.982, "dur": 5.332, + "args": { + "External id": 988563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940491821.768, "dur": 2.541, + "args": { + "External id": 988564,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940491822.713, "dur": 1.495, + "args": { + "External id": 988565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940491830.287, "dur": 8.248, + "args": { + "External id": 988566,"Record function id": 0, "Sequence number": 10552617, "Fwd thread id": 1, "Ev Idx": 6549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940491831.568, "dur": 3.958, + "args": { + "External id": 988567,"Sequence number": 10552617, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6550 + } + }, + { + "ph": "f", "id": 302, "pid": 2338710, "tid": 2379450, "ts": 6345940491831.568, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940491833.079, "dur": 2.232, + "args": { + "External id": 988568,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940491833.968, "dur": 1.222, + "args": { + "External id": 988569,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940491842.882, "dur": 148.394, + "args": { + "External id": 988570,"Record function id": 0, "Sequence number": 10552616, "Fwd thread id": 1, "Ev Idx": 6553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940491844.323, "dur": 139.404, + "args": { + "External id": 988571,"Sequence number": 10552616, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6554 + } + }, + { + "ph": "f", "id": 303, "pid": 2338710, "tid": 2379450, "ts": 6345940491844.323, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940491849.506, "dur": 5.045, + "args": { + "External id": 988572,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940491851.126, "dur": 2.806, + "args": { + "External id": 988573,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 6556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940491852.790, "dur": 0.990, + "args": { + "External id": 988574,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940491855.775, "dur": 68.864, + "args": { + "External id": 988575,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 6558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940491926.098, "dur": 7.012, + "args": { + "External id": 988576,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940491927.323, "dur": 5.047, + "args": { + "External id": 988577,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940491929.079, "dur": 3.074, + "args": { + "External id": 988578,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940491937.627, "dur": 4.177, + "args": { + "External id": 988579,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940491938.649, "dur": 2.631, + "args": { + "External id": 988580,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940491940.432, "dur": 0.750, + "args": { + "External id": 988581,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940491942.442, "dur": 40.367, + "args": { + "External id": 988582,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 6565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940491997.599, "dur": 8.800, + "args": { + "External id": 988583,"Record function id": 0, "Sequence number": 10552615, "Fwd thread id": 1, "Ev Idx": 6566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940491998.743, "dur": 5.016, + "args": { + "External id": 988584,"Sequence number": 10552615, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6567 + } + }, + { + "ph": "f", "id": 304, "pid": 2338710, "tid": 2379450, "ts": 6345940491998.743, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940492000.756, "dur": 2.848, + "args": { + "External id": 988585,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940492002.122, "dur": 1.367, + "args": { + "External id": 988586,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940492032.926, "dur": 13.401, + "args": { + "External id": 988587,"Record function id": 0, "Sequence number": 10552614, "Fwd thread id": 1, "Ev Idx": 6570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940492034.421, "dur": 8.691, + "args": { + "External id": 988588,"Sequence number": 10552614, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6571 + } + }, + { + "ph": "f", "id": 305, "pid": 2338710, "tid": 2379450, "ts": 6345940492034.421, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940492035.705, "dur": 7.117, + "args": { + "External id": 988589,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940492036.911, "dur": 5.118, + "args": { + "External id": 988590,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940492041.156, "dur": 0.644, + "args": { + "External id": 988591,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940492051.350, "dur": 59.260, + "args": { + "External id": 988592,"Record function id": 0, "Ev Idx": 6575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940492053.542, "dur": 55.427, + "args": { + "External id": 988593,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940492099.916, "dur": 8.486, + "args": { + "External id": 988594,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940492102.700, "dur": 5.383, + "args": { + "External id": 988595,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940492116.908, "dur": 9.331, + "args": { + "External id": 988596,"Record function id": 0, "Sequence number": 10552613, "Fwd thread id": 1, "Ev Idx": 6579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940492118.483, "dur": 5.373, + "args": { + "External id": 988597,"Sequence number": 10552613, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6580 + } + }, + { + "ph": "f", "id": 306, "pid": 2338710, "tid": 2379450, "ts": 6345940492118.483, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940492120.320, "dur": 3.352, + "args": { + "External id": 988598,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940492121.491, "dur": 2.003, + "args": { + "External id": 988599,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940492131.154, "dur": 405.031, + "args": { + "External id": 988600,"Record function id": 0, "Sequence number": 10552612, "Fwd thread id": 1, "Ev Idx": 6583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940492132.920, "dur": 384.077, + "args": { + "External id": 988601,"Sequence number": 10552612, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6584 + } + }, + { + "ph": "f", "id": 307, "pid": 2338710, "tid": 2379450, "ts": 6345940492132.920, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345940492153.455, "dur": 9.398, + "args": { + "External id": 988602,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940492157.076, "dur": 5.269, + "args": { + "External id": 988603,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345940492165.525, "dur": 4.074, + "args": { + "External id": 988604,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940492167.429, "dur": 1.942, + "args": { + "External id": 988605,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345940492171.258, "dur": 4.073, + "args": { + "External id": 988606,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940492172.893, "dur": 2.235, + "args": { + "External id": 988607,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345940492205.363, "dur": 282.217, + "args": { + "External id": 988608,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 6591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940492300.920, "dur": 4.810, + "args": { + "External id": 988609,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940492308.248, "dur": 2.819, + "args": { + "External id": 988610,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940492312.733, "dur": 2.908, + "args": { + "External id": 988611,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940492316.886, "dur": 4.560, + "args": { + "External id": 988612,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940492373.630, "dur": 3.833, + "args": { + "External id": 988613,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940492375.974, "dur": 1.370, + "args": { + "External id": 988614,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940492381.909, "dur": 32.777, + "args": { + "External id": 988615,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940492390.713, "dur": 1.149, + "args": { + "External id": 988616,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940492416.940, "dur": 2.353, + "args": { + "External id": 988617,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940492418.471, "dur": 0.739, + "args": { + "External id": 988618,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940492420.813, "dur": 16.438, + "args": { + "External id": 988619,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940492423.646, "dur": 0.544, + "args": { + "External id": 988620,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2379450, + "ts": 6345940492502.784, "dur": 3.905, + "args": { + "External id": 988621,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2379450, + "ts": 6345940492510.056, "dur": 0.688, + "args": { + "External id": 988622,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2379450, + "ts": 6345940492513.046, "dur": 0.864, + "args": { + "External id": 988623,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940492544.768, "dur": 254.175, + "args": { + "External id": 988624,"Record function id": 0, "Sequence number": 10552611, "Fwd thread id": 1, "Ev Idx": 6607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940492546.751, "dur": 244.546, + "args": { + "External id": 988625,"Sequence number": 10552611, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6608 + } + }, + { + "ph": "f", "id": 308, "pid": 2338710, "tid": 2379450, "ts": 6345940492546.751, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2379450, + "ts": 6345940492569.806, "dur": 49.131, + "args": { + "External id": 988626,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940492573.179, "dur": 3.747, + "args": { + "External id": 988627,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940492578.621, "dur": 39.318, + "args": { + "External id": 988628,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 6611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345940492630.877, "dur": 5.346, + "args": { + "External id": 988629,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940492633.208, "dur": 2.656, + "args": { + "External id": 988630,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940492807.426, "dur": 189.567, + "args": { + "External id": 988631,"Record function id": 0, "Sequence number": 10552610, "Fwd thread id": 1, "Ev Idx": 6614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940492809.519, "dur": 180.367, + "args": { + "External id": 988632,"Sequence number": 10552610, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6615 + } + }, + { + "ph": "f", "id": 309, "pid": 2338710, "tid": 2379450, "ts": 6345940492809.519, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2379450, + "ts": 6345940492822.575, "dur": 52.112, + "args": { + "External id": 988633,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940492825.864, "dur": 3.586, + "args": { + "External id": 988634,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940492833.708, "dur": 40.417, + "args": { + "External id": 988635,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 6618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345940492883.430, "dur": 5.675, + "args": { + "External id": 988636,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940492885.972, "dur": 2.774, + "args": { + "External id": 988637,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493004.867, "dur": 41.527, + "args": { + "External id": 988638,"Record function id": 0, "Sequence number": 10552609, "Fwd thread id": 1, "Ev Idx": 6621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493006.392, "dur": 35.065, + "args": { + "External id": 988639,"Sequence number": 10552609, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6622 + } + }, + { + "ph": "f", "id": 310, "pid": 2338710, "tid": 2379450, "ts": 6345940493006.392, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940493033.052, "dur": 8.134, + "args": { + "External id": 988640,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940493034.967, "dur": 5.838, + "args": { + "External id": 988641,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493053.165, "dur": 58.613, + "args": { + "External id": 988642,"Record function id": 0, "Sequence number": 10552608, "Fwd thread id": 1, "Ev Idx": 6625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493054.344, "dur": 54.113, + "args": { + "External id": 988643,"Sequence number": 10552608, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6626 + } + }, + { + "ph": "f", "id": 311, "pid": 2338710, "tid": 2379450, "ts": 6345940493054.344, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940493103.407, "dur": 4.798, + "args": { + "External id": 988644,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940493104.566, "dur": 3.338, + "args": { + "External id": 988645,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493118.079, "dur": 11.408, + "args": { + "External id": 988646,"Record function id": 0, "Sequence number": 10552607, "Fwd thread id": 1, "Ev Idx": 6629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493119.440, "dur": 7.792, + "args": { + "External id": 988647,"Sequence number": 10552607, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6630 + } + }, + { + "ph": "f", "id": 312, "pid": 2338710, "tid": 2379450, "ts": 6345940493119.440, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940493121.219, "dur": 5.802, + "args": { + "External id": 988648,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940493125.444, "dur": 1.430, + "args": { + "External id": 988649,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493134.008, "dur": 7.532, + "args": { + "External id": 988650,"Record function id": 0, "Sequence number": 10552606, "Fwd thread id": 1, "Ev Idx": 6633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493135.221, "dur": 3.978, + "args": { + "External id": 988651,"Sequence number": 10552606, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 6634 + } + }, + { + "ph": "f", "id": 313, "pid": 2338710, "tid": 2379450, "ts": 6345940493135.221, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940493136.619, "dur": 2.347, + "args": { + "External id": 988652,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940493137.555, "dur": 1.296, + "args": { + "External id": 988653,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493145.821, "dur": 205.023, + "args": { + "External id": 988654,"Record function id": 0, "Sequence number": 10552605, "Fwd thread id": 1, "Ev Idx": 6637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493146.664, "dur": 194.189, + "args": { + "External id": 988655,"Sequence number": 10552605, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6638 + } + }, + { + "ph": "f", "id": 314, "pid": 2338710, "tid": 2379450, "ts": 6345940493146.664, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940493151.044, "dur": 13.715, + "args": { + "External id": 988656,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940493156.274, "dur": 7.622, + "args": { + "External id": 988657,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 6640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940493159.099, "dur": 4.480, + "args": { + "External id": 988658,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 6641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940493166.759, "dur": 91.872, + "args": { + "External id": 988659,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 6642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940493260.948, "dur": 11.897, + "args": { + "External id": 988660,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940493261.842, "dur": 10.015, + "args": { + "External id": 988661,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 6644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940493268.573, "dur": 3.026, + "args": { + "External id": 988662,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 6645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940493277.130, "dur": 3.891, + "args": { + "External id": 988663,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940493278.314, "dur": 2.141, + "args": { + "External id": 988664,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940493279.646, "dur": 0.697, + "args": { + "External id": 988665,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940493281.842, "dur": 57.848, + "args": { + "External id": 988666,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 6649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493356.984, "dur": 8.261, + "args": { + "External id": 988667,"Record function id": 0, "Sequence number": 10552604, "Fwd thread id": 1, "Ev Idx": 6650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493358.050, "dur": 5.036, + "args": { + "External id": 988668,"Sequence number": 10552604, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6651 + } + }, + { + "ph": "f", "id": 315, "pid": 2338710, "tid": 2379450, "ts": 6345940493358.050, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940493360.023, "dur": 2.894, + "args": { + "External id": 988669,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940493361.291, "dur": 1.450, + "args": { + "External id": 988670,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493369.789, "dur": 42.339, + "args": { + "External id": 988671,"Record function id": 0, "Sequence number": 10552603, "Fwd thread id": 1, "Ev Idx": 6654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493370.829, "dur": 38.881, + "args": { + "External id": 988672,"Sequence number": 10552603, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6655 + } + }, + { + "ph": "f", "id": 316, "pid": 2338710, "tid": 2379450, "ts": 6345940493370.829, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940493405.655, "dur": 3.823, + "args": { + "External id": 988673,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940493406.564, "dur": 2.362, + "args": { + "External id": 988674,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940493408.285, "dur": 0.492, + "args": { + "External id": 988675,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940493419.446, "dur": 13.419, + "args": { + "External id": 988676,"Record function id": 0, "Ev Idx": 6659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940493421.149, "dur": 10.722, + "args": { + "External id": 988677,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940493424.116, "dur": 7.290, + "args": { + "External id": 988678,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940493425.347, "dur": 5.940, + "args": { + "External id": 988679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493436.976, "dur": 6.684, + "args": { + "External id": 988680,"Record function id": 0, "Sequence number": 10552602, "Fwd thread id": 1, "Ev Idx": 6663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493438.254, "dur": 3.306, + "args": { + "External id": 988681,"Sequence number": 10552602, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 6664 + } + }, + { + "ph": "f", "id": 317, "pid": 2338710, "tid": 2379450, "ts": 6345940493438.254, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940493439.421, "dur": 1.928, + "args": { + "External id": 988682,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940493440.234, "dur": 1.010, + "args": { + "External id": 988683,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493447.727, "dur": 105.925, + "args": { + "External id": 988684,"Record function id": 0, "Sequence number": 10552601, "Fwd thread id": 1, "Ev Idx": 6667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493451.143, "dur": 93.626, + "args": { + "External id": 988685,"Sequence number": 10552601, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6668 + } + }, + { + "ph": "f", "id": 318, "pid": 2338710, "tid": 2379450, "ts": 6345940493451.143, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940493453.273, "dur": 3.391, + "args": { + "External id": 988686,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940493453.904, "dur": 2.254, + "args": { + "External id": 988687,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 6670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940493455.626, "dur": 0.363, + "args": { + "External id": 988688,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 6671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940493457.445, "dur": 32.382, + "args": { + "External id": 988689,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 6672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940493491.341, "dur": 6.079, + "args": { + "External id": 988690,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940493491.935, "dur": 4.724, + "args": { + "External id": 988691,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 6674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940493495.800, "dur": 0.715, + "args": { + "External id": 988692,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 6675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940493498.694, "dur": 3.743, + "args": { + "External id": 988693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940493499.664, "dur": 2.285, + "args": { + "External id": 988694,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940493501.520, "dur": 0.303, + "args": { + "External id": 988695,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940493502.956, "dur": 40.564, + "args": { + "External id": 988696,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 6679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493559.938, "dur": 40.550, + "args": { + "External id": 988697,"Record function id": 0, "Sequence number": 10552600, "Fwd thread id": 1, "Ev Idx": 6680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493560.924, "dur": 6.314, + "args": { + "External id": 988698,"Sequence number": 10552600, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6681 + } + }, + { + "ph": "f", "id": 319, "pid": 2338710, "tid": 2379450, "ts": 6345940493560.924, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940493562.500, "dur": 4.529, + "args": { + "External id": 988699,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940493563.336, "dur": 3.498, + "args": { + "External id": 988700,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2379450, + "ts": 6345940493570.942, "dur": 26.433, + "args": { + "External id": 988701,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493604.985, "dur": 10.670, + "args": { + "External id": 988702,"Record function id": 0, "Sequence number": 10552599, "Fwd thread id": 1, "Ev Idx": 6685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493608.062, "dur": 5.211, + "args": { + "External id": 988703,"Sequence number": 10552599, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6686 + } + }, + { + "ph": "f", "id": 320, "pid": 2338710, "tid": 2379450, "ts": 6345940493608.062, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940493608.595, "dur": 4.434, + "args": { + "External id": 988704,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940493609.594, "dur": 2.672, + "args": { + "External id": 988705,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940493611.546, "dur": 0.573, + "args": { + "External id": 988706,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940493620.315, "dur": 5.642, + "args": { + "External id": 988707,"Record function id": 0, "Ev Idx": 6690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940493622.156, "dur": 3.278, + "args": { + "External id": 988708,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940493623.226, "dur": 1.815, + "args": { + "External id": 988709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940493623.780, "dur": 1.138, + "args": { + "External id": 988710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493630.123, "dur": 9.711, + "args": { + "External id": 988711,"Record function id": 0, "Sequence number": 10552598, "Fwd thread id": 1, "Ev Idx": 6694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493631.570, "dur": 5.917, + "args": { + "External id": 988712,"Sequence number": 10552598, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6695 + } + }, + { + "ph": "f", "id": 321, "pid": 2338710, "tid": 2379450, "ts": 6345940493631.570, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940493632.624, "dur": 4.706, + "args": { + "External id": 988713,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940493636.314, "dur": 0.851, + "args": { + "External id": 988714,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493643.831, "dur": 113.165, + "args": { + "External id": 988715,"Record function id": 0, "Sequence number": 10552597, "Fwd thread id": 1, "Ev Idx": 6698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493644.574, "dur": 103.182, + "args": { + "External id": 988716,"Sequence number": 10552597, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6699 + } + }, + { + "ph": "f", "id": 322, "pid": 2338710, "tid": 2379450, "ts": 6345940493644.574, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940493646.994, "dur": 2.991, + "args": { + "External id": 988717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940493647.694, "dur": 1.713, + "args": { + "External id": 988718,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 6701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940493648.837, "dur": 0.415, + "args": { + "External id": 988719,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940493650.980, "dur": 41.087, + "args": { + "External id": 988720,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 6703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940493696.423, "dur": 4.118, + "args": { + "External id": 988721,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940493697.230, "dur": 2.486, + "args": { + "External id": 988722,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940493698.821, "dur": 0.743, + "args": { + "External id": 988723,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940493701.749, "dur": 4.115, + "args": { + "External id": 988724,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940493702.842, "dur": 2.479, + "args": { + "External id": 988725,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940493704.649, "dur": 0.524, + "args": { + "External id": 988726,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940493708.479, "dur": 38.469, + "args": { + "External id": 988727,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 6710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493763.057, "dur": 27.219, + "args": { + "External id": 988728,"Record function id": 0, "Sequence number": 10552596, "Fwd thread id": 1, "Ev Idx": 6711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493763.948, "dur": 4.363, + "args": { + "External id": 988729,"Sequence number": 10552596, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6712 + } + }, + { + "ph": "f", "id": 323, "pid": 2338710, "tid": 2379450, "ts": 6345940493763.948, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940493765.769, "dur": 2.356, + "args": { + "External id": 988730,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940493766.659, "dur": 1.329, + "args": { + "External id": 988731,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940493771.083, "dur": 16.294, + "args": { + "External id": 988732,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493794.645, "dur": 8.535, + "args": { + "External id": 988733,"Record function id": 0, "Sequence number": 10552595, "Fwd thread id": 1, "Ev Idx": 6716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940493795.597, "dur": 4.991, + "args": { + "External id": 988734,"Sequence number": 10552595, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6717 + } + }, + { + "ph": "f", "id": 324, "pid": 2338710, "tid": 2379450, "ts": 6345940493795.597, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940493796.304, "dur": 4.050, + "args": { + "External id": 988735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940493797.191, "dur": 2.467, + "args": { + "External id": 988736,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940493799.061, "dur": 0.448, + "args": { + "External id": 988737,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940493808.182, "dur": 5.813, + "args": { + "External id": 988738,"Record function id": 0, "Ev Idx": 6721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940493809.723, "dur": 3.703, + "args": { + "External id": 988739,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940493810.673, "dur": 1.963, + "args": { + "External id": 988740,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940493811.154, "dur": 1.370, + "args": { + "External id": 988741,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940493819.089, "dur": 516.364, + "args": { + "External id": 988742,"Record function id": 0, "Sequence number": 10552594, "Fwd thread id": 1, "Ev Idx": 6725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940493820.587, "dur": 473.612, + "args": { + "External id": 988743,"Sequence number": 10552594, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6726 + } + }, + { + "ph": "f", "id": 325, "pid": 2338710, "tid": 2379450, "ts": 6345940493820.587, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940493861.144, "dur": 4.180, + "args": { + "External id": 988744,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940493864.059, "dur": 1.147, + "args": { + "External id": 988745,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940493882.680, "dur": 4.475, + "args": { + "External id": 988746,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940493897.954, "dur": 2.275, + "args": { + "External id": 988747,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940494158.357, "dur": 5.639, + "args": { + "External id": 988748,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 6731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940494169.376, "dur": 48.479, + "args": { + "External id": 988749,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 6732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940494186.964, "dur": 1.236, + "args": { + "External id": 988750,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 6733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345940494225.006, "dur": 41.195, + "args": { + "External id": 988751,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 6734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345940494227.292, "dur": 38.662, + "args": { + "External id": 988752,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 6735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940494235.294, "dur": 5.294, + "args": { + "External id": 988753,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940494242.533, "dur": 22.763, + "args": { + "External id": 988754,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 6737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2379450, + "ts": 6345940494271.344, "dur": 3.217, + "args": { + "External id": 988755,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 6738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940494273.009, "dur": 1.431, + "args": { + "External id": 988756,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 6739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940494282.929, "dur": 2.846, + "args": { + "External id": 988757,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940494284.509, "dur": 1.130, + "args": { + "External id": 988758,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940494309.572, "dur": 20.865, + "args": { + "External id": 988759,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940494351.174, "dur": 10.618, + "args": { + "External id": 988760,"Record function id": 0, "Ev Idx": 6743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940494353.915, "dur": 7.046, + "args": { + "External id": 988761,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940494356.520, "dur": 3.144, + "args": { + "External id": 988762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940494357.719, "dur": 1.830, + "args": { + "External id": 988763,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940494366.469, "dur": 7.635, + "args": { + "External id": 988764,"Record function id": 0, "Sequence number": 10552593, "Fwd thread id": 1, "Ev Idx": 6747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940494367.916, "dur": 1.763, + "args": { + "External id": 988765,"Sequence number": 10552593, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6748 + } + }, + { + "ph": "f", "id": 326, "pid": 2338710, "tid": 2379450, "ts": 6345940494367.916, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940494378.694, "dur": 486.095, + "args": { + "External id": 988766,"Record function id": 0, "Sequence number": 10552592, "Fwd thread id": 1, "Ev Idx": 6749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940494383.132, "dur": 468.646, + "args": { + "External id": 988767,"Sequence number": 10552592, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6750 + } + }, + { + "ph": "f", "id": 327, "pid": 2338710, "tid": 2379450, "ts": 6345940494383.132, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940494421.112, "dur": 9.876, + "args": { + "External id": 988768,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338710, "tid": 2379450, + "ts": 6345940494427.036, "dur": 3.605, + "args": { + "External id": 988769,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 6752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940494434.543, "dur": 6.303, + "args": { + "External id": 988770,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940494436.918, "dur": 3.105, + "args": { + "External id": 988771,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940494439.294, "dur": 0.502, + "args": { + "External id": 988772,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2379450, + "ts": 6345940494445.290, "dur": 115.743, + "args": { + "External id": 988773,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 6756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940494448.551, "dur": 6.166, + "args": { + "External id": 988774,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 6757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940494449.316, "dur": 4.817, + "args": { + "External id": 988775,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940494450.883, "dur": 3.060, + "args": { + "External id": 988776,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2379450, + "ts": 6345940494456.039, "dur": 104.489, + "args": { + "External id": 988777,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940494458.037, "dur": 101.560, + "args": { + "External id": 988778,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2379450, + "ts": 6345940494565.539, "dur": 3.641, + "args": { + "External id": 988779,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 6762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940494567.254, "dur": 1.751, + "args": { + "External id": 988780,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940494607.046, "dur": 5.994, + "args": { + "External id": 988781,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940494614.812, "dur": 5.076, + "args": { + "External id": 988782,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940494621.150, "dur": 2.680, + "args": { + "External id": 988783,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940494659.490, "dur": 2.934, + "args": { + "External id": 988784,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940494660.700, "dur": 1.550, + "args": { + "External id": 988785,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338710, "tid": 2379450, + "ts": 6345940494690.793, "dur": 138.857, + "args": { + "External id": 988786,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 6769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2379450, + "ts": 6345940494700.115, "dur": 6.796, + "args": { + "External id": 988787,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940494704.882, "dur": 1.092, + "args": { + "External id": 988788,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345940494708.684, "dur": 5.180, + "args": { + "External id": 988789,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 6772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940494712.489, "dur": 0.482, + "args": { + "External id": 988790,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 6773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2379450, + "ts": 6345940494715.180, "dur": 3.020, + "args": { + "External id": 988791,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940494717.084, "dur": 0.699, + "args": { + "External id": 988792,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345940494719.094, "dur": 3.800, + "args": { + "External id": 988793,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940494721.484, "dur": 0.781, + "args": { + "External id": 988794,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 6777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345940494727.603, "dur": 5.273, + "args": { + "External id": 988795,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 6778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940494732.060, "dur": 0.456, + "args": { + "External id": 988796,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 6779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940494734.066, "dur": 8.013, + "args": { + "External id": 988797,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 6780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338710, "tid": 2379450, + "ts": 6345940494737.904, "dur": 3.987, + "args": { + "External id": 988798,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 6781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345940494745.520, "dur": 2.766, + "args": { + "External id": 988799,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 6782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940494747.234, "dur": 0.648, + "args": { + "External id": 988800,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 6783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940494749.360, "dur": 3.119, + "args": { + "External id": 988801,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940494750.363, "dur": 2.023, + "args": { + "External id": 988802,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345940494753.946, "dur": 62.027, + "args": { + "External id": 988803,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 6786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940494818.470, "dur": 1.161, + "args": { + "External id": 988804,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 6787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345940494820.611, "dur": 4.347, + "args": { + "External id": 988805,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 6788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940494823.376, "dur": 0.628, + "args": { + "External id": 988806,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 6789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940494827.350, "dur": 1.120, + "args": { + "External id": 988807,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 6790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940494876.278, "dur": 9.972, + "args": { + "External id": 988808,"Record function id": 0, "Ev Idx": 6791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940494878.321, "dur": 6.902, + "args": { + "External id": 988809,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940494880.306, "dur": 3.544, + "args": { + "External id": 988810,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940494881.546, "dur": 2.184, + "args": { + "External id": 988811,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940494890.836, "dur": 7.917, + "args": { + "External id": 988812,"Record function id": 0, "Sequence number": 10552591, "Fwd thread id": 1, "Ev Idx": 6795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940494892.411, "dur": 4.191, + "args": { + "External id": 988813,"Sequence number": 10552591, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6796 + } + }, + { + "ph": "f", "id": 328, "pid": 2338710, "tid": 2379450, "ts": 6345940494892.411, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940494894.178, "dur": 2.259, + "args": { + "External id": 988814,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940494895.193, "dur": 1.105, + "args": { + "External id": 988815,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940494903.180, "dur": 207.772, + "args": { + "External id": 988816,"Record function id": 0, "Sequence number": 10552590, "Fwd thread id": 1, "Ev Idx": 6799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940494904.160, "dur": 142.812, + "args": { + "External id": 988817,"Sequence number": 10552590, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6800 + } + }, + { + "ph": "f", "id": 329, "pid": 2338710, "tid": 2379450, "ts": 6345940494904.160, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940494907.890, "dur": 9.068, + "args": { + "External id": 988818,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940494909.220, "dur": 7.135, + "args": { + "External id": 988819,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940494913.024, "dur": 3.126, + "args": { + "External id": 988820,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940494918.169, "dur": 47.602, + "args": { + "External id": 988821,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940494967.279, "dur": 4.347, + "args": { + "External id": 988822,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940494968.065, "dur": 2.925, + "args": { + "External id": 988823,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940494969.661, "dur": 1.112, + "args": { + "External id": 988824,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940494973.140, "dur": 6.805, + "args": { + "External id": 988825,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940494977.243, "dur": 2.208, + "args": { + "External id": 988826,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940494978.670, "dur": 0.706, + "args": { + "External id": 988827,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940494980.894, "dur": 64.451, + "args": { + "External id": 988828,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940495122.738, "dur": 10.335, + "args": { + "External id": 988829,"Record function id": 0, "Sequence number": 10552589, "Fwd thread id": 1, "Ev Idx": 6812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940495124.266, "dur": 6.876, + "args": { + "External id": 988830,"Sequence number": 10552589, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6813 + } + }, + { + "ph": "f", "id": 330, "pid": 2338710, "tid": 2379450, "ts": 6345940495124.266, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940495126.712, "dur": 4.214, + "args": { + "External id": 988831,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940495128.289, "dur": 2.508, + "args": { + "External id": 988832,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940495136.999, "dur": 11.577, + "args": { + "External id": 988833,"Record function id": 0, "Sequence number": 10552588, "Fwd thread id": 1, "Ev Idx": 6816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940495138.096, "dur": 7.547, + "args": { + "External id": 988834,"Sequence number": 10552588, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6817 + } + }, + { + "ph": "f", "id": 331, "pid": 2338710, "tid": 2379450, "ts": 6345940495138.096, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940495139.149, "dur": 6.230, + "args": { + "External id": 988835,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940495142.512, "dur": 2.182, + "args": { + "External id": 988836,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940495143.970, "dur": 0.580, + "args": { + "External id": 988837,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940495153.658, "dur": 7.801, + "args": { + "External id": 988838,"Record function id": 0, "Ev Idx": 6821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940495155.612, "dur": 5.167, + "args": { + "External id": 988839,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940495157.512, "dur": 2.918, + "args": { + "External id": 988840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940495158.608, "dur": 1.660, + "args": { + "External id": 988841,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940495165.510, "dur": 7.124, + "args": { + "External id": 988842,"Record function id": 0, "Sequence number": 10552587, "Fwd thread id": 1, "Ev Idx": 6825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940495167.021, "dur": 3.596, + "args": { + "External id": 988843,"Sequence number": 10552587, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6826 + } + }, + { + "ph": "f", "id": 332, "pid": 2338710, "tid": 2379450, "ts": 6345940495167.021, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940495168.247, "dur": 2.202, + "args": { + "External id": 988844,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940495169.249, "dur": 1.051, + "args": { + "External id": 988845,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940495176.847, "dur": 122.435, + "args": { + "External id": 988846,"Record function id": 0, "Sequence number": 10552586, "Fwd thread id": 1, "Ev Idx": 6829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940495180.466, "dur": 109.030, + "args": { + "External id": 988847,"Sequence number": 10552586, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6830 + } + }, + { + "ph": "f", "id": 333, "pid": 2338710, "tid": 2379450, "ts": 6345940495180.466, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940495182.935, "dur": 5.517, + "args": { + "External id": 988848,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940495183.529, "dur": 4.423, + "args": { + "External id": 988849,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940495184.800, "dur": 3.019, + "args": { + "External id": 988850,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940495189.381, "dur": 49.095, + "args": { + "External id": 988851,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940495240.086, "dur": 7.079, + "args": { + "External id": 988852,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940495240.937, "dur": 5.638, + "args": { + "External id": 988853,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940495244.819, "dur": 1.611, + "args": { + "External id": 988854,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940495248.268, "dur": 4.252, + "args": { + "External id": 988855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940495249.432, "dur": 2.350, + "args": { + "External id": 988856,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940495251.196, "dur": 0.507, + "args": { + "External id": 988857,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940495253.325, "dur": 35.184, + "args": { + "External id": 988858,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940495304.767, "dur": 37.380, + "args": { + "External id": 988859,"Record function id": 0, "Sequence number": 10552585, "Fwd thread id": 1, "Ev Idx": 6842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940495305.864, "dur": 4.868, + "args": { + "External id": 988860,"Sequence number": 10552585, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6843 + } + }, + { + "ph": "f", "id": 334, "pid": 2338710, "tid": 2379450, "ts": 6345940495305.864, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940495308.058, "dur": 2.502, + "args": { + "External id": 988861,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940495309.281, "dur": 1.171, + "args": { + "External id": 988862,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2379450, + "ts": 6345940495314.300, "dur": 24.243, + "args": { + "External id": 988863,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940495346.902, "dur": 10.688, + "args": { + "External id": 988864,"Record function id": 0, "Sequence number": 10552584, "Fwd thread id": 1, "Ev Idx": 6847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940495350.126, "dur": 5.349, + "args": { + "External id": 988865,"Sequence number": 10552584, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6848 + } + }, + { + "ph": "f", "id": 335, "pid": 2338710, "tid": 2379450, "ts": 6345940495350.126, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940495351.096, "dur": 4.129, + "args": { + "External id": 988866,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940495352.216, "dur": 2.350, + "args": { + "External id": 988867,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940495353.970, "dur": 0.401, + "args": { + "External id": 988868,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940495362.079, "dur": 8.111, + "args": { + "External id": 988869,"Record function id": 0, "Ev Idx": 6852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940495363.911, "dur": 5.708, + "args": { + "External id": 988870,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940495364.846, "dur": 4.288, + "args": { + "External id": 988871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940495365.400, "dur": 3.616, + "args": { + "External id": 988872,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940495375.009, "dur": 481.523, + "args": { + "External id": 988873,"Record function id": 0, "Sequence number": 10552583, "Fwd thread id": 1, "Ev Idx": 6856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940495376.921, "dur": 443.207, + "args": { + "External id": 988874,"Sequence number": 10552583, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 6857 + } + }, + { + "ph": "f", "id": 336, "pid": 2338710, "tid": 2379450, "ts": 6345940495376.921, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338710, "tid": 2379450, + "ts": 6345940495404.150, "dur": 43.743, + "args": { + "External id": 988875,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2379450, + "ts": 6345940495406.213, "dur": 41.446, + "args": { + "External id": 988876,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345940495409.513, "dur": 12.555, + "args": { + "External id": 988877,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 6860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940495417.888, "dur": 3.583, + "args": { + "External id": 988878,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940495423.903, "dur": 23.180, + "args": { + "External id": 988879,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940495464.605, "dur": 2.859, + "args": { + "External id": 988880,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940495465.997, "dur": 1.303, + "args": { + "External id": 988881,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940495471.570, "dur": 2.431, + "args": { + "External id": 988882,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940495473.033, "dur": 0.867, + "args": { + "External id": 988883,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940495490.613, "dur": 2.857, + "args": { + "External id": 988884,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940495506.584, "dur": 2.664, + "args": { + "External id": 988885,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940495693.978, "dur": 4.642, + "args": { + "External id": 988886,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 6869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940495703.892, "dur": 39.659, + "args": { + "External id": 988887,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 6870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940495714.536, "dur": 3.264, + "args": { + "External id": 988888,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 6871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345940495749.911, "dur": 34.503, + "args": { + "External id": 988889,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 6872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345940495752.014, "dur": 32.156, + "args": { + "External id": 988890,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 6873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940495757.083, "dur": 5.131, + "args": { + "External id": 988891,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940495766.221, "dur": 17.352, + "args": { + "External id": 988892,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 6875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2379450, + "ts": 6345940495789.231, "dur": 2.952, + "args": { + "External id": 988893,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 6876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940495790.876, "dur": 1.157, + "args": { + "External id": 988894,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 6877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940495800.014, "dur": 2.691, + "args": { + "External id": 988895,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940495801.193, "dur": 1.394, + "args": { + "External id": 988896,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940495805.351, "dur": 2.260, + "args": { + "External id": 988897,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940495806.363, "dur": 1.151, + "args": { + "External id": 988898,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940495835.395, "dur": 19.546, + "args": { + "External id": 988899,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940495867.824, "dur": 8.432, + "args": { + "External id": 988900,"Record function id": 0, "Ev Idx": 6883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940495870.081, "dur": 5.332, + "args": { + "External id": 988901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940495871.898, "dur": 2.596, + "args": { + "External id": 988902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940495872.936, "dur": 1.445, + "args": { + "External id": 988903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940495880.349, "dur": 8.524, + "args": { + "External id": 988904,"Record function id": 0, "Sequence number": 10552582, "Fwd thread id": 1, "Ev Idx": 6887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940495881.692, "dur": 4.524, + "args": { + "External id": 988905,"Sequence number": 10552582, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6888 + } + }, + { + "ph": "f", "id": 337, "pid": 2338710, "tid": 2379450, "ts": 6345940495881.692, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940495883.485, "dur": 2.561, + "args": { + "External id": 988906,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940495884.461, "dur": 1.413, + "args": { + "External id": 988907,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940495893.283, "dur": 223.480, + "args": { + "External id": 988908,"Record function id": 0, "Sequence number": 10552581, "Fwd thread id": 1, "Ev Idx": 6891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940495894.205, "dur": 211.764, + "args": { + "External id": 988909,"Sequence number": 10552581, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6892 + } + }, + { + "ph": "f", "id": 338, "pid": 2338710, "tid": 2379450, "ts": 6345940495894.205, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940495899.900, "dur": 4.650, + "args": { + "External id": 988910,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940495901.173, "dur": 2.674, + "args": { + "External id": 988911,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 6894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940495902.795, "dur": 0.873, + "args": { + "External id": 988912,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940495905.580, "dur": 69.615, + "args": { + "External id": 988913,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 6896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940495976.614, "dur": 8.841, + "args": { + "External id": 988914,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940495977.673, "dur": 6.981, + "args": { + "External id": 988915,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940495981.658, "dur": 2.746, + "args": { + "External id": 988916,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940495987.181, "dur": 3.853, + "args": { + "External id": 988917,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940495988.510, "dur": 2.003, + "args": { + "External id": 988918,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940495989.874, "dur": 0.516, + "args": { + "External id": 988919,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940495991.768, "dur": 112.066, + "args": { + "External id": 988920,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 6903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940496126.047, "dur": 10.663, + "args": { + "External id": 988921,"Record function id": 0, "Sequence number": 10552580, "Fwd thread id": 1, "Ev Idx": 6904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940496127.542, "dur": 6.834, + "args": { + "External id": 988922,"Sequence number": 10552580, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6905 + } + }, + { + "ph": "f", "id": 339, "pid": 2338710, "tid": 2379450, "ts": 6345940496127.542, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940496130.291, "dur": 3.918, + "args": { + "External id": 988923,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940496132.148, "dur": 1.912, + "args": { + "External id": 988924,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940496141.163, "dur": 10.692, + "args": { + "External id": 988925,"Record function id": 0, "Sequence number": 10552579, "Fwd thread id": 1, "Ev Idx": 6908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940496142.572, "dur": 7.103, + "args": { + "External id": 988926,"Sequence number": 10552579, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6909 + } + }, + { + "ph": "f", "id": 340, "pid": 2338710, "tid": 2379450, "ts": 6345940496142.572, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940496143.379, "dur": 6.036, + "args": { + "External id": 988927,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940496144.403, "dur": 4.389, + "args": { + "External id": 988928,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940496148.013, "dur": 0.642, + "args": { + "External id": 988929,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940496157.183, "dur": 7.452, + "args": { + "External id": 988930,"Record function id": 0, "Ev Idx": 6913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940496158.849, "dur": 5.146, + "args": { + "External id": 988931,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940496160.721, "dur": 2.949, + "args": { + "External id": 988932,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940496161.737, "dur": 1.787, + "args": { + "External id": 988933,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940496168.533, "dur": 6.820, + "args": { + "External id": 988934,"Record function id": 0, "Sequence number": 10552578, "Fwd thread id": 1, "Ev Idx": 6917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940496169.523, "dur": 3.767, + "args": { + "External id": 988935,"Sequence number": 10552578, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6918 + } + }, + { + "ph": "f", "id": 341, "pid": 2338710, "tid": 2379450, "ts": 6345940496169.523, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940496170.475, "dur": 2.655, + "args": { + "External id": 988936,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940496171.598, "dur": 1.366, + "args": { + "External id": 988937,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940496181.328, "dur": 442.608, + "args": { + "External id": 988938,"Record function id": 0, "Sequence number": 10552577, "Fwd thread id": 1, "Ev Idx": 6921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940496182.628, "dur": 422.499, + "args": { + "External id": 988939,"Sequence number": 10552577, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6922 + } + }, + { + "ph": "f", "id": 342, "pid": 2338710, "tid": 2379450, "ts": 6345940496182.628, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345940496201.537, "dur": 9.343, + "args": { + "External id": 988940,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940496205.469, "dur": 4.911, + "args": { + "External id": 988941,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345940496213.238, "dur": 3.818, + "args": { + "External id": 988942,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940496215.114, "dur": 1.702, + "args": { + "External id": 988943,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345940496218.602, "dur": 5.517, + "args": { + "External id": 988944,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940496220.733, "dur": 3.150, + "args": { + "External id": 988945,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345940496253.758, "dur": 322.598, + "args": { + "External id": 988946,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 6929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940496350.093, "dur": 6.520, + "args": { + "External id": 988947,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940496358.781, "dur": 3.038, + "args": { + "External id": 988948,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940496363.364, "dur": 3.192, + "args": { + "External id": 988949,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940496367.856, "dur": 2.306, + "args": { + "External id": 988950,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940496463.578, "dur": 3.366, + "args": { + "External id": 988951,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940496465.355, "dur": 1.442, + "args": { + "External id": 988952,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940496469.123, "dur": 32.640, + "args": { + "External id": 988953,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940496477.301, "dur": 0.991, + "args": { + "External id": 988954,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940496506.116, "dur": 2.415, + "args": { + "External id": 988955,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940496507.684, "dur": 0.730, + "args": { + "External id": 988956,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940496509.423, "dur": 17.927, + "args": { + "External id": 988957,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940496512.730, "dur": 0.648, + "args": { + "External id": 988958,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2379450, + "ts": 6345940496590.536, "dur": 4.043, + "args": { + "External id": 988959,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2379450, + "ts": 6345940496598.090, "dur": 0.814, + "args": { + "External id": 988960,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2379450, + "ts": 6345940496601.402, "dur": 0.754, + "args": { + "External id": 988961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940496633.417, "dur": 271.602, + "args": { + "External id": 988962,"Record function id": 0, "Sequence number": 10552576, "Fwd thread id": 1, "Ev Idx": 6945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940496635.198, "dur": 262.650, + "args": { + "External id": 988963,"Sequence number": 10552576, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6946 + } + }, + { + "ph": "f", "id": 343, "pid": 2338710, "tid": 2379450, "ts": 6345940496635.198, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2379450, + "ts": 6345940496658.579, "dur": 56.327, + "args": { + "External id": 988964,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940496662.871, "dur": 3.891, + "args": { + "External id": 988965,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940496668.510, "dur": 45.707, + "args": { + "External id": 988966,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 6949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345940496726.290, "dur": 8.047, + "args": { + "External id": 988967,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940496728.794, "dur": 5.217, + "args": { + "External id": 988968,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940496913.453, "dur": 284.489, + "args": { + "External id": 988969,"Record function id": 0, "Sequence number": 10552575, "Fwd thread id": 1, "Ev Idx": 6952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940496915.519, "dur": 272.425, + "args": { + "External id": 988970,"Sequence number": 10552575, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6953 + } + }, + { + "ph": "f", "id": 344, "pid": 2338710, "tid": 2379450, "ts": 6345940496915.519, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2379450, + "ts": 6345940496928.373, "dur": 53.561, + "args": { + "External id": 988971,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940496935.631, "dur": 3.100, + "args": { + "External id": 988972,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940496942.986, "dur": 38.294, + "args": { + "External id": 988973,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 6956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345940496990.132, "dur": 6.503, + "args": { + "External id": 988974,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940496992.840, "dur": 3.477, + "args": { + "External id": 988975,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497209.820, "dur": 20.372, + "args": { + "External id": 988976,"Record function id": 0, "Sequence number": 10552574, "Fwd thread id": 1, "Ev Idx": 6959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497211.724, "dur": 14.892, + "args": { + "External id": 988977,"Sequence number": 10552574, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6960 + } + }, + { + "ph": "f", "id": 345, "pid": 2338710, "tid": 2379450, "ts": 6345940497211.724, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940497215.628, "dur": 10.678, + "args": { + "External id": 988978,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940497217.765, "dur": 8.318, + "args": { + "External id": 988979,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497234.982, "dur": 7.844, + "args": { + "External id": 988980,"Record function id": 0, "Sequence number": 10552573, "Fwd thread id": 1, "Ev Idx": 6963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497236.079, "dur": 4.232, + "args": { + "External id": 988981,"Sequence number": 10552573, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6964 + } + }, + { + "ph": "f", "id": 346, "pid": 2338710, "tid": 2379450, "ts": 6345940497236.079, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940497237.827, "dur": 2.304, + "args": { + "External id": 988982,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940497238.963, "dur": 1.041, + "args": { + "External id": 988983,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497246.851, "dur": 9.605, + "args": { + "External id": 988984,"Record function id": 0, "Sequence number": 10552572, "Fwd thread id": 1, "Ev Idx": 6967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497247.624, "dur": 6.427, + "args": { + "External id": 988985,"Sequence number": 10552572, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6968 + } + }, + { + "ph": "f", "id": 347, "pid": 2338710, "tid": 2379450, "ts": 6345940497247.624, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940497249.130, "dur": 4.744, + "args": { + "External id": 988986,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940497252.723, "dur": 1.028, + "args": { + "External id": 988987,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497260.781, "dur": 8.387, + "args": { + "External id": 988988,"Record function id": 0, "Sequence number": 10552571, "Fwd thread id": 1, "Ev Idx": 6971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497262.892, "dur": 3.881, + "args": { + "External id": 988989,"Sequence number": 10552571, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 6972 + } + }, + { + "ph": "f", "id": 348, "pid": 2338710, "tid": 2379450, "ts": 6345940497262.892, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940497264.564, "dur": 2.029, + "args": { + "External id": 988990,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940497265.312, "dur": 1.183, + "args": { + "External id": 988991,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497273.279, "dur": 193.355, + "args": { + "External id": 988992,"Record function id": 0, "Sequence number": 10552570, "Fwd thread id": 1, "Ev Idx": 6975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497274.147, "dur": 183.532, + "args": { + "External id": 988993,"Sequence number": 10552570, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6976 + } + }, + { + "ph": "f", "id": 349, "pid": 2338710, "tid": 2379450, "ts": 6345940497274.147, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940497278.423, "dur": 9.869, + "args": { + "External id": 988994,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940497282.731, "dur": 4.641, + "args": { + "External id": 988995,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 6978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940497285.198, "dur": 1.873, + "args": { + "External id": 988996,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 6979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940497290.445, "dur": 90.672, + "args": { + "External id": 988997,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 6980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940497382.521, "dur": 5.824, + "args": { + "External id": 988998,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940497383.454, "dur": 4.024, + "args": { + "External id": 988999,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 6982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940497385.621, "dur": 1.672, + "args": { + "External id": 989000,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 6983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940497390.405, "dur": 6.130, + "args": { + "External id": 989001,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940497393.871, "dur": 2.108, + "args": { + "External id": 989002,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940497395.195, "dur": 0.708, + "args": { + "External id": 989003,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940497397.541, "dur": 59.263, + "args": { + "External id": 989004,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 6987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497473.012, "dur": 8.207, + "args": { + "External id": 989005,"Record function id": 0, "Sequence number": 10552569, "Fwd thread id": 1, "Ev Idx": 6988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497474.082, "dur": 4.866, + "args": { + "External id": 989006,"Sequence number": 10552569, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6989 + } + }, + { + "ph": "f", "id": 350, "pid": 2338710, "tid": 2379450, "ts": 6345940497474.082, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940497476.293, "dur": 2.495, + "args": { + "External id": 989007,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940497477.262, "dur": 1.420, + "args": { + "External id": 989008,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497485.095, "dur": 11.022, + "args": { + "External id": 989009,"Record function id": 0, "Sequence number": 10552568, "Fwd thread id": 1, "Ev Idx": 6992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497486.222, "dur": 7.935, + "args": { + "External id": 989010,"Sequence number": 10552568, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6993 + } + }, + { + "ph": "f", "id": 351, "pid": 2338710, "tid": 2379450, "ts": 6345940497486.222, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940497487.431, "dur": 6.443, + "args": { + "External id": 989011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940497488.607, "dur": 4.626, + "args": { + "External id": 989012,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940497492.549, "dur": 0.550, + "args": { + "External id": 989013,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940497503.272, "dur": 11.524, + "args": { + "External id": 989014,"Record function id": 0, "Ev Idx": 6997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940497505.659, "dur": 8.147, + "args": { + "External id": 989015,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940497508.723, "dur": 4.669, + "args": { + "External id": 989016,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940497510.478, "dur": 2.789, + "args": { + "External id": 989017,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497518.802, "dur": 7.546, + "args": { + "External id": 989018,"Record function id": 0, "Sequence number": 10552567, "Fwd thread id": 1, "Ev Idx": 7001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497520.426, "dur": 3.783, + "args": { + "External id": 989019,"Sequence number": 10552567, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 7002 + } + }, + { + "ph": "f", "id": 352, "pid": 2338710, "tid": 2379450, "ts": 6345940497520.426, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940497521.414, "dur": 2.636, + "args": { + "External id": 989020,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940497522.764, "dur": 1.182, + "args": { + "External id": 989021,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497530.241, "dur": 105.063, + "args": { + "External id": 989022,"Record function id": 0, "Sequence number": 10552566, "Fwd thread id": 1, "Ev Idx": 7005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497533.817, "dur": 94.284, + "args": { + "External id": 989023,"Sequence number": 10552566, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7006 + } + }, + { + "ph": "f", "id": 353, "pid": 2338710, "tid": 2379450, "ts": 6345940497533.817, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940497536.015, "dur": 5.782, + "args": { + "External id": 989024,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940497536.746, "dur": 4.542, + "args": { + "External id": 989025,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 7008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940497538.057, "dur": 3.095, + "args": { + "External id": 989026,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 7009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940497542.473, "dur": 31.200, + "args": { + "External id": 989027,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 7010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940497574.919, "dur": 6.838, + "args": { + "External id": 989028,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940497575.746, "dur": 5.318, + "args": { + "External id": 989029,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 7012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940497579.897, "dur": 1.018, + "args": { + "External id": 989030,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 7013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940497583.333, "dur": 3.706, + "args": { + "External id": 989031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940497584.660, "dur": 1.848, + "args": { + "External id": 989032,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940497586.063, "dur": 0.348, + "args": { + "External id": 989033,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940497587.836, "dur": 39.456, + "args": { + "External id": 989034,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 7017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497641.078, "dur": 37.374, + "args": { + "External id": 989035,"Record function id": 0, "Sequence number": 10552565, "Fwd thread id": 1, "Ev Idx": 7018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497642.310, "dur": 4.671, + "args": { + "External id": 989036,"Sequence number": 10552565, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7019 + } + }, + { + "ph": "f", "id": 354, "pid": 2338710, "tid": 2379450, "ts": 6345940497642.310, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940497644.020, "dur": 2.802, + "args": { + "External id": 989037,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940497645.301, "dur": 1.400, + "args": { + "External id": 989038,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2379450, + "ts": 6345940497650.529, "dur": 25.077, + "args": { + "External id": 989039,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497685.287, "dur": 9.025, + "args": { + "External id": 989040,"Record function id": 0, "Sequence number": 10552564, "Fwd thread id": 1, "Ev Idx": 7023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497686.333, "dur": 5.757, + "args": { + "External id": 989041,"Sequence number": 10552564, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7024 + } + }, + { + "ph": "f", "id": 355, "pid": 2338710, "tid": 2379450, "ts": 6345940497686.333, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940497687.160, "dur": 4.680, + "args": { + "External id": 989042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940497688.262, "dur": 2.859, + "args": { + "External id": 989043,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940497690.271, "dur": 0.701, + "args": { + "External id": 989044,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940497698.850, "dur": 8.365, + "args": { + "External id": 989045,"Record function id": 0, "Ev Idx": 7028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940497700.548, "dur": 6.103, + "args": { + "External id": 989046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940497701.995, "dur": 4.282, + "args": { + "External id": 989047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940497702.546, "dur": 3.631, + "args": { + "External id": 989048,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497711.163, "dur": 8.559, + "args": { + "External id": 989049,"Record function id": 0, "Sequence number": 10552563, "Fwd thread id": 1, "Ev Idx": 7032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497712.296, "dur": 5.344, + "args": { + "External id": 989050,"Sequence number": 10552563, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7033 + } + }, + { + "ph": "f", "id": 356, "pid": 2338710, "tid": 2379450, "ts": 6345940497712.296, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940497713.611, "dur": 3.868, + "args": { + "External id": 989051,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940497716.236, "dur": 1.124, + "args": { + "External id": 989052,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497723.905, "dur": 106.656, + "args": { + "External id": 989053,"Record function id": 0, "Sequence number": 10552562, "Fwd thread id": 1, "Ev Idx": 7036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497724.873, "dur": 97.162, + "args": { + "External id": 989054,"Sequence number": 10552562, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7037 + } + }, + { + "ph": "f", "id": 357, "pid": 2338710, "tid": 2379450, "ts": 6345940497724.873, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940497726.853, "dur": 2.788, + "args": { + "External id": 989055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940497727.300, "dur": 1.845, + "args": { + "External id": 989056,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 7039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940497728.506, "dur": 0.510, + "args": { + "External id": 989057,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 7040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940497730.575, "dur": 39.371, + "args": { + "External id": 989058,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 7041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940497773.697, "dur": 4.127, + "args": { + "External id": 989059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940497774.422, "dur": 2.728, + "args": { + "External id": 989060,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940497776.004, "dur": 1.002, + "args": { + "External id": 989061,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940497779.020, "dur": 3.437, + "args": { + "External id": 989062,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940497779.964, "dur": 1.736, + "args": { + "External id": 989063,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940497781.219, "dur": 0.382, + "args": { + "External id": 989064,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940497785.591, "dur": 35.610, + "args": { + "External id": 989065,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 7048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497836.472, "dur": 28.363, + "args": { + "External id": 989066,"Record function id": 0, "Sequence number": 10552561, "Fwd thread id": 1, "Ev Idx": 7049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497837.691, "dur": 3.971, + "args": { + "External id": 989067,"Sequence number": 10552561, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7050 + } + }, + { + "ph": "f", "id": 358, "pid": 2338710, "tid": 2379450, "ts": 6345940497837.691, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940497839.113, "dur": 2.391, + "args": { + "External id": 989068,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940497840.168, "dur": 1.225, + "args": { + "External id": 989069,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940497844.506, "dur": 17.620, + "args": { + "External id": 989070,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497869.234, "dur": 12.242, + "args": { + "External id": 989071,"Record function id": 0, "Sequence number": 10552560, "Fwd thread id": 1, "Ev Idx": 7054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940497870.231, "dur": 8.700, + "args": { + "External id": 989072,"Sequence number": 10552560, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7055 + } + }, + { + "ph": "f", "id": 359, "pid": 2338710, "tid": 2379450, "ts": 6345940497870.231, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940497871.428, "dur": 7.285, + "args": { + "External id": 989073,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940497872.358, "dur": 5.635, + "args": { + "External id": 989074,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940497874.471, "dur": 3.370, + "args": { + "External id": 989075,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940497886.000, "dur": 6.127, + "args": { + "External id": 989076,"Record function id": 0, "Ev Idx": 7059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940497887.925, "dur": 3.580, + "args": { + "External id": 989077,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940497889.047, "dur": 1.750, + "args": { + "External id": 989078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940497889.638, "dur": 1.055, + "args": { + "External id": 989079,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940497896.983, "dur": 488.830, + "args": { + "External id": 989080,"Record function id": 0, "Sequence number": 10552559, "Fwd thread id": 1, "Ev Idx": 7063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940497898.388, "dur": 447.992, + "args": { + "External id": 989081,"Sequence number": 10552559, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7064 + } + }, + { + "ph": "f", "id": 360, "pid": 2338710, "tid": 2379450, "ts": 6345940497898.388, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940497934.287, "dur": 2.088, + "args": { + "External id": 989082,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940497935.008, "dur": 1.192, + "args": { + "External id": 989083,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940497952.955, "dur": 4.737, + "args": { + "External id": 989084,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940497968.813, "dur": 2.202, + "args": { + "External id": 989085,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940498213.646, "dur": 3.463, + "args": { + "External id": 989086,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 7069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940498222.550, "dur": 43.962, + "args": { + "External id": 989087,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 7070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940498237.315, "dur": 1.150, + "args": { + "External id": 989088,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 7071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345940498274.117, "dur": 40.406, + "args": { + "External id": 989089,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 7072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345940498278.655, "dur": 35.627, + "args": { + "External id": 989090,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 7073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940498284.375, "dur": 5.371, + "args": { + "External id": 989091,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940498291.801, "dur": 21.936, + "args": { + "External id": 989092,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 7075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2379450, + "ts": 6345940498320.362, "dur": 5.929, + "args": { + "External id": 989093,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 7076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940498322.002, "dur": 4.069, + "args": { + "External id": 989094,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 7077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940498333.869, "dur": 4.916, + "args": { + "External id": 989095,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940498337.386, "dur": 1.289, + "args": { + "External id": 989096,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940498359.608, "dur": 20.994, + "args": { + "External id": 989097,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940498401.456, "dur": 10.131, + "args": { + "External id": 989098,"Record function id": 0, "Ev Idx": 7081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940498403.577, "dur": 7.158, + "args": { + "External id": 989099,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940498406.156, "dur": 3.403, + "args": { + "External id": 989100,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940498407.481, "dur": 1.945, + "args": { + "External id": 989101,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940498416.551, "dur": 6.100, + "args": { + "External id": 989102,"Record function id": 0, "Sequence number": 10552558, "Fwd thread id": 1, "Ev Idx": 7085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940498417.962, "dur": 1.442, + "args": { + "External id": 989103,"Sequence number": 10552558, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7086 + } + }, + { + "ph": "f", "id": 361, "pid": 2338710, "tid": 2379450, "ts": 6345940498417.962, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940498427.026, "dur": 480.023, + "args": { + "External id": 989104,"Record function id": 0, "Sequence number": 10552557, "Fwd thread id": 1, "Ev Idx": 7087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940498428.388, "dur": 465.451, + "args": { + "External id": 989105,"Sequence number": 10552557, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7088 + } + }, + { + "ph": "f", "id": 362, "pid": 2338710, "tid": 2379450, "ts": 6345940498428.388, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940498465.223, "dur": 10.322, + "args": { + "External id": 989106,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338710, "tid": 2379450, + "ts": 6345940498471.410, "dur": 3.772, + "args": { + "External id": 989107,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 7090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940498478.753, "dur": 5.333, + "args": { + "External id": 989108,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940498480.324, "dur": 3.032, + "args": { + "External id": 989109,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 7092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940498482.075, "dur": 1.087, + "args": { + "External id": 989110,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 7093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2379450, + "ts": 6345940498490.846, "dur": 108.936, + "args": { + "External id": 989111,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 7094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940498491.616, "dur": 3.090, + "args": { + "External id": 989112,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 7095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940498492.195, "dur": 1.931, + "args": { + "External id": 989113,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 7096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940498493.209, "dur": 0.753, + "args": { + "External id": 989114,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 7097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2379450, + "ts": 6345940498496.466, "dur": 102.705, + "args": { + "External id": 989115,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 7098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940498498.195, "dur": 99.887, + "args": { + "External id": 989116,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 7099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2379450, + "ts": 6345940498604.766, "dur": 6.049, + "args": { + "External id": 989117,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 7100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940498608.503, "dur": 2.141, + "args": { + "External id": 989118,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 7101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940498645.769, "dur": 4.447, + "args": { + "External id": 989119,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940498651.745, "dur": 5.972, + "args": { + "External id": 989120,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940498659.376, "dur": 2.607, + "args": { + "External id": 989121,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940498698.170, "dur": 2.503, + "args": { + "External id": 989122,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940498699.306, "dur": 1.197, + "args": { + "External id": 989123,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338710, "tid": 2379450, + "ts": 6345940498726.630, "dur": 144.988, + "args": { + "External id": 989124,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 7107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2379450, + "ts": 6345940498735.550, "dur": 6.684, + "args": { + "External id": 989125,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940498740.350, "dur": 1.062, + "args": { + "External id": 989126,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 7109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345940498744.168, "dur": 5.739, + "args": { + "External id": 989127,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 7110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940498748.180, "dur": 0.795, + "args": { + "External id": 989128,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 7111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2379450, + "ts": 6345940498751.350, "dur": 2.929, + "args": { + "External id": 989129,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 7112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940498753.350, "dur": 0.543, + "args": { + "External id": 989130,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 7113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345940498755.527, "dur": 3.570, + "args": { + "External id": 989131,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 7114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940498757.591, "dur": 0.881, + "args": { + "External id": 989132,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 7115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345940498767.242, "dur": 5.808, + "args": { + "External id": 989133,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 7116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940498772.057, "dur": 0.579, + "args": { + "External id": 989134,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 7117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940498774.827, "dur": 8.426, + "args": { + "External id": 989135,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 7118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338710, "tid": 2379450, + "ts": 6345940498778.843, "dur": 4.214, + "args": { + "External id": 989136,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 7119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345940498786.434, "dur": 2.836, + "args": { + "External id": 989137,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 7120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940498788.490, "dur": 0.423, + "args": { + "External id": 989138,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 7121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940498790.384, "dur": 2.759, + "args": { + "External id": 989139,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 7122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940498791.363, "dur": 1.661, + "args": { + "External id": 989140,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 7123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345940498794.486, "dur": 63.016, + "args": { + "External id": 989141,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 7124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940498859.803, "dur": 1.218, + "args": { + "External id": 989142,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 7125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338710, "tid": 2379450, + "ts": 6345940498862.190, "dur": 3.954, + "args": { + "External id": 989143,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 7126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940498864.934, "dur": 0.487, + "args": { + "External id": 989144,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 7127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940498869.418, "dur": 1.059, + "args": { + "External id": 989145,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 7128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940498919.549, "dur": 10.232, + "args": { + "External id": 989146,"Record function id": 0, "Ev Idx": 7129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940498922.090, "dur": 6.813, + "args": { + "External id": 989147,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940498924.137, "dur": 3.586, + "args": { + "External id": 989148,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940498925.467, "dur": 2.108, + "args": { + "External id": 989149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940498934.458, "dur": 8.649, + "args": { + "External id": 989150,"Record function id": 0, "Sequence number": 10552556, "Fwd thread id": 1, "Ev Idx": 7133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940498935.951, "dur": 4.568, + "args": { + "External id": 989151,"Sequence number": 10552556, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 7134 + } + }, + { + "ph": "f", "id": 363, "pid": 2338710, "tid": 2379450, "ts": 6345940498935.951, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940498937.851, "dur": 2.419, + "args": { + "External id": 989152,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940498939.022, "dur": 1.069, + "args": { + "External id": 989153,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940498947.183, "dur": 211.596, + "args": { + "External id": 989154,"Record function id": 0, "Sequence number": 10552555, "Fwd thread id": 1, "Ev Idx": 7137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940498948.235, "dur": 200.181, + "args": { + "External id": 989155,"Sequence number": 10552555, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 7138 + } + }, + { + "ph": "f", "id": 364, "pid": 2338710, "tid": 2379450, "ts": 6345940498948.235, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940498951.182, "dur": 9.195, + "args": { + "External id": 989156,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 7139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940498952.701, "dur": 6.976, + "args": { + "External id": 989157,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 7140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940498956.748, "dur": 2.715, + "args": { + "External id": 989158,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 7141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940498961.524, "dur": 73.706, + "args": { + "External id": 989159,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 7142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940499037.882, "dur": 6.464, + "args": { + "External id": 989160,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940499039.171, "dur": 4.207, + "args": { + "External id": 989161,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 7144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940499041.797, "dur": 1.333, + "args": { + "External id": 989162,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 7145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940499046.278, "dur": 5.649, + "args": { + "External id": 989163,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940499049.735, "dur": 1.628, + "args": { + "External id": 989164,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 7147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940499050.836, "dur": 0.408, + "args": { + "External id": 989165,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 7148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940499052.611, "dur": 94.080, + "args": { + "External id": 989166,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 7149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940499169.177, "dur": 9.942, + "args": { + "External id": 989167,"Record function id": 0, "Sequence number": 10552554, "Fwd thread id": 1, "Ev Idx": 7150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940499170.677, "dur": 6.703, + "args": { + "External id": 989168,"Sequence number": 10552554, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7151 + } + }, + { + "ph": "f", "id": 365, "pid": 2338710, "tid": 2379450, "ts": 6345940499170.677, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940499173.719, "dur": 3.468, + "args": { + "External id": 989169,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940499175.199, "dur": 1.867, + "args": { + "External id": 989170,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940499183.440, "dur": 10.296, + "args": { + "External id": 989171,"Record function id": 0, "Sequence number": 10552553, "Fwd thread id": 1, "Ev Idx": 7154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940499184.294, "dur": 7.270, + "args": { + "External id": 989172,"Sequence number": 10552553, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7155 + } + }, + { + "ph": "f", "id": 366, "pid": 2338710, "tid": 2379450, "ts": 6345940499184.294, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940499185.142, "dur": 6.166, + "args": { + "External id": 989173,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940499188.377, "dur": 2.347, + "args": { + "External id": 989174,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 7157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940499190.052, "dur": 0.549, + "args": { + "External id": 989175,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 7158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940499198.893, "dur": 9.327, + "args": { + "External id": 989176,"Record function id": 0, "Ev Idx": 7159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940499200.511, "dur": 7.061, + "args": { + "External id": 989177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940499202.370, "dur": 4.821, + "args": { + "External id": 989178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940499203.325, "dur": 3.739, + "args": { + "External id": 989179,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940499212.056, "dur": 6.595, + "args": { + "External id": 989180,"Record function id": 0, "Sequence number": 10552552, "Fwd thread id": 1, "Ev Idx": 7163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940499213.089, "dur": 3.395, + "args": { + "External id": 989181,"Sequence number": 10552552, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 7164 + } + }, + { + "ph": "f", "id": 367, "pid": 2338710, "tid": 2379450, "ts": 6345940499213.089, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940499214.586, "dur": 1.703, + "args": { + "External id": 989182,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940499215.211, "dur": 0.866, + "args": { + "External id": 989183,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940499222.962, "dur": 135.697, + "args": { + "External id": 989184,"Record function id": 0, "Sequence number": 10552551, "Fwd thread id": 1, "Ev Idx": 7167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940499251.522, "dur": 99.183, + "args": { + "External id": 989185,"Sequence number": 10552551, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 7168 + } + }, + { + "ph": "f", "id": 368, "pid": 2338710, "tid": 2379450, "ts": 6345940499251.522, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940499254.247, "dur": 3.600, + "args": { + "External id": 989186,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 7169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940499254.922, "dur": 2.297, + "args": { + "External id": 989187,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 7170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940499256.604, "dur": 0.466, + "args": { + "External id": 989188,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 7171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940499258.827, "dur": 39.398, + "args": { + "External id": 989189,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 7172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940499299.798, "dur": 8.796, + "args": { + "External id": 989190,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940499300.317, "dur": 7.595, + "args": { + "External id": 989191,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 7174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940499304.404, "dur": 3.331, + "args": { + "External id": 989192,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 7175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940499309.735, "dur": 3.620, + "args": { + "External id": 989193,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940499310.610, "dur": 2.161, + "args": { + "External id": 989194,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 7177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940499312.017, "dur": 0.652, + "args": { + "External id": 989195,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 7178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940499313.824, "dur": 36.157, + "args": { + "External id": 989196,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 7179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940499364.233, "dur": 39.248, + "args": { + "External id": 989197,"Record function id": 0, "Sequence number": 10552550, "Fwd thread id": 1, "Ev Idx": 7180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940499365.278, "dur": 4.333, + "args": { + "External id": 989198,"Sequence number": 10552550, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7181 + } + }, + { + "ph": "f", "id": 369, "pid": 2338710, "tid": 2379450, "ts": 6345940499365.278, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940499367.262, "dur": 2.161, + "args": { + "External id": 989199,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940499368.150, "dur": 1.141, + "args": { + "External id": 989200,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2379450, + "ts": 6345940499373.114, "dur": 26.500, + "args": { + "External id": 989201,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940499408.123, "dur": 9.951, + "args": { + "External id": 989202,"Record function id": 0, "Sequence number": 10552549, "Fwd thread id": 1, "Ev Idx": 7185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940499411.615, "dur": 4.857, + "args": { + "External id": 989203,"Sequence number": 10552549, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7186 + } + }, + { + "ph": "f", "id": 370, "pid": 2338710, "tid": 2379450, "ts": 6345940499411.615, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940499412.509, "dur": 3.735, + "args": { + "External id": 989204,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940499413.274, "dur": 2.356, + "args": { + "External id": 989205,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 7188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940499414.872, "dur": 0.573, + "args": { + "External id": 989206,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 7189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940499422.985, "dur": 5.207, + "args": { + "External id": 989207,"Record function id": 0, "Ev Idx": 7190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940499424.183, "dur": 3.439, + "args": { + "External id": 989208,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940499425.378, "dur": 1.737, + "args": { + "External id": 989209,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940499426.024, "dur": 0.947, + "args": { + "External id": 989210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940499433.118, "dur": 490.303, + "args": { + "External id": 989211,"Record function id": 0, "Sequence number": 10552548, "Fwd thread id": 1, "Ev Idx": 7194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940499437.690, "dur": 447.782, + "args": { + "External id": 989212,"Sequence number": 10552548, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 7195 + } + }, + { + "ph": "f", "id": 371, "pid": 2338710, "tid": 2379450, "ts": 6345940499437.690, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338710, "tid": 2379450, + "ts": 6345940499468.902, "dur": 37.857, + "args": { + "External id": 989213,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2379450, + "ts": 6345940499470.572, "dur": 35.965, + "args": { + "External id": 989214,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345940499473.698, "dur": 7.291, + "args": { + "External id": 989215,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 7198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940499476.751, "dur": 3.595, + "args": { + "External id": 989216,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940499482.487, "dur": 23.545, + "args": { + "External id": 989217,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940499522.612, "dur": 2.333, + "args": { + "External id": 989218,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940499523.394, "dur": 1.369, + "args": { + "External id": 989219,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940499529.695, "dur": 1.382, + "args": { + "External id": 989220,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940499530.123, "dur": 0.842, + "args": { + "External id": 989221,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940499547.533, "dur": 2.714, + "args": { + "External id": 989222,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940499563.088, "dur": 2.561, + "args": { + "External id": 989223,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940499758.222, "dur": 6.829, + "args": { + "External id": 989224,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 7207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940499770.266, "dur": 38.031, + "args": { + "External id": 989225,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 7208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940499784.478, "dur": 0.879, + "args": { + "External id": 989226,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 7209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345940499815.079, "dur": 34.047, + "args": { + "External id": 989227,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 7210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345940499817.106, "dur": 31.772, + "args": { + "External id": 989228,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 7211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940499822.667, "dur": 5.113, + "args": { + "External id": 989229,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940499831.882, "dur": 16.420, + "args": { + "External id": 989230,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 7213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2379450, + "ts": 6345940499854.280, "dur": 3.173, + "args": { + "External id": 989231,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 7214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940499856.053, "dur": 1.252, + "args": { + "External id": 989232,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 7215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940499865.171, "dur": 2.809, + "args": { + "External id": 989233,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940499866.516, "dur": 1.335, + "args": { + "External id": 989234,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940499870.700, "dur": 2.666, + "args": { + "External id": 989235,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940499872.237, "dur": 1.028, + "args": { + "External id": 989236,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940499902.902, "dur": 18.824, + "args": { + "External id": 989237,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940499934.642, "dur": 8.534, + "args": { + "External id": 989238,"Record function id": 0, "Ev Idx": 7221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940499936.943, "dur": 5.557, + "args": { + "External id": 989239,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940499939.053, "dur": 2.525, + "args": { + "External id": 989240,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940499940.022, "dur": 1.405, + "args": { + "External id": 989241,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940499947.157, "dur": 9.124, + "args": { + "External id": 989242,"Record function id": 0, "Sequence number": 10552547, "Fwd thread id": 1, "Ev Idx": 7225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940499948.733, "dur": 4.489, + "args": { + "External id": 989243,"Sequence number": 10552547, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7226 + } + }, + { + "ph": "f", "id": 372, "pid": 2338710, "tid": 2379450, "ts": 6345940499948.733, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940499950.366, "dur": 2.646, + "args": { + "External id": 989244,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940499951.328, "dur": 1.488, + "args": { + "External id": 989245,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940499960.047, "dur": 246.214, + "args": { + "External id": 989246,"Record function id": 0, "Sequence number": 10552546, "Fwd thread id": 1, "Ev Idx": 7229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940499961.098, "dur": 235.104, + "args": { + "External id": 989247,"Sequence number": 10552546, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7230 + } + }, + { + "ph": "f", "id": 373, "pid": 2338710, "tid": 2379450, "ts": 6345940499961.098, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940499964.344, "dur": 8.043, + "args": { + "External id": 989248,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940499968.452, "dur": 3.265, + "args": { + "External id": 989249,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 7232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940499970.461, "dur": 1.065, + "args": { + "External id": 989250,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 7233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940499973.770, "dur": 145.516, + "args": { + "External id": 989251,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 7234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940500122.881, "dur": 9.217, + "args": { + "External id": 989252,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940500124.048, "dur": 6.853, + "args": { + "External id": 989253,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940500126.730, "dur": 3.976, + "args": { + "External id": 989254,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940500134.274, "dur": 8.673, + "args": { + "External id": 989255,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940500138.116, "dur": 4.214, + "args": { + "External id": 989256,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940500139.491, "dur": 2.760, + "args": { + "External id": 989257,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940500143.933, "dur": 51.226, + "args": { + "External id": 989258,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 7241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940500214.752, "dur": 9.435, + "args": { + "External id": 989259,"Record function id": 0, "Sequence number": 10552545, "Fwd thread id": 1, "Ev Idx": 7242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940500216.317, "dur": 5.789, + "args": { + "External id": 989260,"Sequence number": 10552545, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7243 + } + }, + { + "ph": "f", "id": 374, "pid": 2338710, "tid": 2379450, "ts": 6345940500216.317, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940500218.621, "dur": 3.330, + "args": { + "External id": 989261,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940500220.240, "dur": 1.605, + "args": { + "External id": 989262,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940500228.145, "dur": 11.976, + "args": { + "External id": 989263,"Record function id": 0, "Sequence number": 10552544, "Fwd thread id": 1, "Ev Idx": 7246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940500229.280, "dur": 8.184, + "args": { + "External id": 989264,"Sequence number": 10552544, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7247 + } + }, + { + "ph": "f", "id": 375, "pid": 2338710, "tid": 2379450, "ts": 6345940500229.280, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940500230.008, "dur": 7.215, + "args": { + "External id": 989265,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940500230.960, "dur": 5.655, + "args": { + "External id": 989266,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940500236.089, "dur": 0.418, + "args": { + "External id": 989267,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940500245.220, "dur": 7.862, + "args": { + "External id": 989268,"Record function id": 0, "Ev Idx": 7251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940500247.302, "dur": 5.146, + "args": { + "External id": 989269,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940500249.517, "dur": 2.624, + "args": { + "External id": 989270,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940500250.602, "dur": 1.392, + "args": { + "External id": 989271,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940500256.734, "dur": 7.036, + "args": { + "External id": 989272,"Record function id": 0, "Sequence number": 10552543, "Fwd thread id": 1, "Ev Idx": 7255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940500257.834, "dur": 3.927, + "args": { + "External id": 989273,"Sequence number": 10552543, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7256 + } + }, + { + "ph": "f", "id": 376, "pid": 2338710, "tid": 2379450, "ts": 6345940500257.834, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940500259.068, "dur": 2.530, + "args": { + "External id": 989274,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940500260.126, "dur": 1.368, + "args": { + "External id": 989275,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940500268.758, "dur": 407.397, + "args": { + "External id": 989276,"Record function id": 0, "Sequence number": 10552542, "Fwd thread id": 1, "Ev Idx": 7259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940500270.516, "dur": 386.255, + "args": { + "External id": 989277,"Sequence number": 10552542, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 7260 + } + }, + { + "ph": "f", "id": 377, "pid": 2338710, "tid": 2379450, "ts": 6345940500270.516, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345940500288.968, "dur": 9.064, + "args": { + "External id": 989278,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 7261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940500292.752, "dur": 4.766, + "args": { + "External id": 989279,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345940500300.515, "dur": 4.614, + "args": { + "External id": 989280,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 7263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940500302.584, "dur": 2.300, + "args": { + "External id": 989281,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345940500306.959, "dur": 6.119, + "args": { + "External id": 989282,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 7265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940500308.712, "dur": 4.136, + "args": { + "External id": 989283,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345940500343.978, "dur": 283.844, + "args": { + "External id": 989284,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 7267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940500445.166, "dur": 4.042, + "args": { + "External id": 989285,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940500451.714, "dur": 2.598, + "args": { + "External id": 989286,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940500455.592, "dur": 2.220, + "args": { + "External id": 989287,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940500459.135, "dur": 2.437, + "args": { + "External id": 989288,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940500514.634, "dur": 5.384, + "args": { + "External id": 989289,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940500518.360, "dur": 1.556, + "args": { + "External id": 989290,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940500522.552, "dur": 30.958, + "args": { + "External id": 989291,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940500528.816, "dur": 1.185, + "args": { + "External id": 989292,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940500557.798, "dur": 2.123, + "args": { + "External id": 989293,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940500559.263, "dur": 0.568, + "args": { + "External id": 989294,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940500561.002, "dur": 17.915, + "args": { + "External id": 989295,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940500563.959, "dur": 0.665, + "args": { + "External id": 989296,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2379450, + "ts": 6345940500642.687, "dur": 3.838, + "args": { + "External id": 989297,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 7280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2379450, + "ts": 6345940500649.884, "dur": 0.640, + "args": { + "External id": 989298,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 7281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2379450, + "ts": 6345940500653.037, "dur": 0.600, + "args": { + "External id": 989299,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 7282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940500684.447, "dur": 264.222, + "args": { + "External id": 989300,"Record function id": 0, "Sequence number": 10552541, "Fwd thread id": 1, "Ev Idx": 7283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940500686.275, "dur": 253.793, + "args": { + "External id": 989301,"Sequence number": 10552541, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 7284 + } + }, + { + "ph": "f", "id": 378, "pid": 2338710, "tid": 2379450, "ts": 6345940500686.275, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2379450, + "ts": 6345940500709.122, "dur": 53.113, + "args": { + "External id": 989302,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 7285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940500712.962, "dur": 6.507, + "args": { + "External id": 989303,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940500721.195, "dur": 40.328, + "args": { + "External id": 989304,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 7287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345940500773.785, "dur": 5.104, + "args": { + "External id": 989305,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 7288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940500776.225, "dur": 2.371, + "args": { + "External id": 989306,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940500956.646, "dur": 276.418, + "args": { + "External id": 989307,"Record function id": 0, "Sequence number": 10552540, "Fwd thread id": 1, "Ev Idx": 7290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940500958.680, "dur": 264.987, + "args": { + "External id": 989308,"Sequence number": 10552540, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 7291 + } + }, + { + "ph": "f", "id": 379, "pid": 2338710, "tid": 2379450, "ts": 6345940500958.680, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2379450, + "ts": 6345940500971.771, "dur": 71.256, + "args": { + "External id": 989309,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940500975.090, "dur": 2.853, + "args": { + "External id": 989310,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940500979.176, "dur": 62.941, + "args": { + "External id": 989311,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 7294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2379450, + "ts": 6345940501094.178, "dur": 10.509, + "args": { + "External id": 989312,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 7295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940501098.498, "dur": 5.621, + "args": { + "External id": 989313,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501243.205, "dur": 15.503, + "args": { + "External id": 989314,"Record function id": 0, "Sequence number": 10552539, "Fwd thread id": 1, "Ev Idx": 7297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501244.938, "dur": 10.625, + "args": { + "External id": 989315,"Sequence number": 10552539, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 7298 + } + }, + { + "ph": "f", "id": 380, "pid": 2338710, "tid": 2379450, "ts": 6345940501244.938, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940501247.811, "dur": 7.427, + "args": { + "External id": 989316,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 7299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940501249.412, "dur": 5.497, + "args": { + "External id": 989317,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 7300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501262.877, "dur": 7.803, + "args": { + "External id": 989318,"Record function id": 0, "Sequence number": 10552538, "Fwd thread id": 1, "Ev Idx": 7301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501263.787, "dur": 4.325, + "args": { + "External id": 989319,"Sequence number": 10552538, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 7302 + } + }, + { + "ph": "f", "id": 381, "pid": 2338710, "tid": 2379450, "ts": 6345940501263.787, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940501265.363, "dur": 2.568, + "args": { + "External id": 989320,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 7303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940501266.643, "dur": 1.141, + "args": { + "External id": 989321,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 7304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501274.771, "dur": 7.327, + "args": { + "External id": 989322,"Record function id": 0, "Sequence number": 10552537, "Fwd thread id": 1, "Ev Idx": 7305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501275.553, "dur": 4.191, + "args": { + "External id": 989323,"Sequence number": 10552537, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 7306 + } + }, + { + "ph": "f", "id": 382, "pid": 2338710, "tid": 2379450, "ts": 6345940501275.553, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940501277.247, "dur": 2.325, + "args": { + "External id": 989324,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940501278.615, "dur": 0.847, + "args": { + "External id": 989325,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501286.221, "dur": 8.158, + "args": { + "External id": 989326,"Record function id": 0, "Sequence number": 10552536, "Fwd thread id": 1, "Ev Idx": 7309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501287.070, "dur": 5.265, + "args": { + "External id": 989327,"Sequence number": 10552536, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 7310 + } + }, + { + "ph": "f", "id": 383, "pid": 2338710, "tid": 2379450, "ts": 6345940501287.070, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940501288.205, "dur": 3.957, + "args": { + "External id": 989328,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940501291.107, "dur": 0.935, + "args": { + "External id": 989329,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501298.737, "dur": 183.108, + "args": { + "External id": 989330,"Record function id": 0, "Sequence number": 10552535, "Fwd thread id": 1, "Ev Idx": 7313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501299.972, "dur": 173.989, + "args": { + "External id": 989331,"Sequence number": 10552535, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7314 + } + }, + { + "ph": "f", "id": 384, "pid": 2338710, "tid": 2379450, "ts": 6345940501299.972, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940501304.093, "dur": 10.543, + "args": { + "External id": 989332,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940501306.349, "dur": 7.491, + "args": { + "External id": 989333,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 7316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940501308.818, "dur": 4.736, + "args": { + "External id": 989334,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 7317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940501318.549, "dur": 84.103, + "args": { + "External id": 989335,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 7318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940501404.408, "dur": 4.790, + "args": { + "External id": 989336,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940501405.337, "dur": 3.088, + "args": { + "External id": 989337,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 7320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940501407.362, "dur": 0.897, + "args": { + "External id": 989338,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 7321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940501411.042, "dur": 6.488, + "args": { + "External id": 989339,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940501412.588, "dur": 4.414, + "args": { + "External id": 989340,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940501416.289, "dur": 0.619, + "args": { + "External id": 989341,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940501418.478, "dur": 54.315, + "args": { + "External id": 989342,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 7325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501487.676, "dur": 8.493, + "args": { + "External id": 989343,"Record function id": 0, "Sequence number": 10552534, "Fwd thread id": 1, "Ev Idx": 7326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501488.852, "dur": 5.077, + "args": { + "External id": 989344,"Sequence number": 10552534, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7327 + } + }, + { + "ph": "f", "id": 385, "pid": 2338710, "tid": 2379450, "ts": 6345940501488.852, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940501491.040, "dur": 2.714, + "args": { + "External id": 989345,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940501492.410, "dur": 1.230, + "args": { + "External id": 989346,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501500.652, "dur": 10.553, + "args": { + "External id": 989347,"Record function id": 0, "Sequence number": 10552533, "Fwd thread id": 1, "Ev Idx": 7330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501501.710, "dur": 7.076, + "args": { + "External id": 989348,"Sequence number": 10552533, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7331 + } + }, + { + "ph": "f", "id": 386, "pid": 2338710, "tid": 2379450, "ts": 6345940501501.710, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940501502.767, "dur": 5.761, + "args": { + "External id": 989349,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940501505.743, "dur": 2.193, + "args": { + "External id": 989350,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940501507.232, "dur": 0.580, + "args": { + "External id": 989351,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940501517.945, "dur": 13.116, + "args": { + "External id": 989352,"Record function id": 0, "Ev Idx": 7335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940501519.869, "dur": 10.285, + "args": { + "External id": 989353,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940501523.237, "dur": 6.510, + "args": { + "External id": 989354,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940501524.318, "dur": 5.292, + "args": { + "External id": 989355,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501535.059, "dur": 6.069, + "args": { + "External id": 989356,"Record function id": 0, "Sequence number": 10552532, "Fwd thread id": 1, "Ev Idx": 7339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501536.009, "dur": 3.094, + "args": { + "External id": 989357,"Sequence number": 10552532, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 7340 + } + }, + { + "ph": "f", "id": 387, "pid": 2338710, "tid": 2379450, "ts": 6345940501536.009, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940501537.225, "dur": 1.706, + "args": { + "External id": 989358,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940501537.892, "dur": 0.885, + "args": { + "External id": 989359,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501544.914, "dur": 103.078, + "args": { + "External id": 989360,"Record function id": 0, "Sequence number": 10552531, "Fwd thread id": 1, "Ev Idx": 7343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501548.716, "dur": 91.917, + "args": { + "External id": 989361,"Sequence number": 10552531, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7344 + } + }, + { + "ph": "f", "id": 388, "pid": 2338710, "tid": 2379450, "ts": 6345940501548.716, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940501550.883, "dur": 2.835, + "args": { + "External id": 989362,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940501551.511, "dur": 1.694, + "args": { + "External id": 989363,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 7346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940501552.562, "dur": 0.516, + "args": { + "External id": 989364,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 7347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940501554.518, "dur": 32.161, + "args": { + "External id": 989365,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 7348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940501588.090, "dur": 7.064, + "args": { + "External id": 989366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940501588.710, "dur": 5.694, + "args": { + "External id": 989367,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 7350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940501592.523, "dur": 1.722, + "args": { + "External id": 989368,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 7351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940501596.392, "dur": 3.135, + "args": { + "External id": 989369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940501597.230, "dur": 1.762, + "args": { + "External id": 989370,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940501598.483, "dur": 0.390, + "args": { + "External id": 989371,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940501600.213, "dur": 39.579, + "args": { + "External id": 989372,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 7355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501653.001, "dur": 56.982, + "args": { + "External id": 989373,"Record function id": 0, "Sequence number": 10552530, "Fwd thread id": 1, "Ev Idx": 7356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501661.925, "dur": 13.927, + "args": { + "External id": 989374,"Sequence number": 10552530, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7357 + } + }, + { + "ph": "f", "id": 389, "pid": 2338710, "tid": 2379450, "ts": 6345940501661.925, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940501673.048, "dur": 2.637, + "args": { + "External id": 989375,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940501674.162, "dur": 1.347, + "args": { + "External id": 989376,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2379450, + "ts": 6345940501679.558, "dur": 27.372, + "args": { + "External id": 989377,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501714.732, "dur": 10.481, + "args": { + "External id": 989378,"Record function id": 0, "Sequence number": 10552529, "Fwd thread id": 1, "Ev Idx": 7361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501715.727, "dur": 7.367, + "args": { + "External id": 989379,"Sequence number": 10552529, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7362 + } + }, + { + "ph": "f", "id": 390, "pid": 2338710, "tid": 2379450, "ts": 6345940501715.727, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940501718.791, "dur": 4.061, + "args": { + "External id": 989380,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940501719.802, "dur": 2.473, + "args": { + "External id": 989381,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940501721.590, "dur": 0.536, + "args": { + "External id": 989382,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940501729.833, "dur": 6.135, + "args": { + "External id": 989383,"Record function id": 0, "Ev Idx": 7366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940501731.305, "dur": 4.067, + "args": { + "External id": 989384,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940501732.491, "dur": 2.293, + "args": { + "External id": 989385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940501733.447, "dur": 1.221, + "args": { + "External id": 989386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501740.071, "dur": 6.790, + "args": { + "External id": 989387,"Record function id": 0, "Sequence number": 10552528, "Fwd thread id": 1, "Ev Idx": 7370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501741.023, "dur": 3.804, + "args": { + "External id": 989388,"Sequence number": 10552528, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7371 + } + }, + { + "ph": "f", "id": 391, "pid": 2338710, "tid": 2379450, "ts": 6345940501741.023, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940501742.636, "dur": 2.029, + "args": { + "External id": 989389,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940501743.405, "dur": 1.134, + "args": { + "External id": 989390,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501750.732, "dur": 109.154, + "args": { + "External id": 989391,"Record function id": 0, "Sequence number": 10552527, "Fwd thread id": 1, "Ev Idx": 7374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501754.365, "dur": 98.810, + "args": { + "External id": 989392,"Sequence number": 10552527, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7375 + } + }, + { + "ph": "f", "id": 392, "pid": 2338710, "tid": 2379450, "ts": 6345940501754.365, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940501756.365, "dur": 3.153, + "args": { + "External id": 989393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940501757.012, "dur": 1.989, + "args": { + "External id": 989394,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 7377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940501758.206, "dur": 0.675, + "args": { + "External id": 989395,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 7378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940501760.425, "dur": 39.520, + "args": { + "External id": 989396,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 7379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940501801.406, "dur": 6.514, + "args": { + "External id": 989397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940501802.043, "dur": 5.224, + "args": { + "External id": 989398,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940501806.446, "dur": 0.685, + "args": { + "External id": 989399,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940501809.378, "dur": 3.571, + "args": { + "External id": 989400,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940501810.427, "dur": 1.968, + "args": { + "External id": 989401,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940501811.827, "dur": 0.490, + "args": { + "External id": 989402,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940501813.557, "dur": 38.654, + "args": { + "External id": 989403,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 7386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501864.979, "dur": 34.348, + "args": { + "External id": 989404,"Record function id": 0, "Sequence number": 10552526, "Fwd thread id": 1, "Ev Idx": 7387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501866.106, "dur": 9.437, + "args": { + "External id": 989405,"Sequence number": 10552526, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7388 + } + }, + { + "ph": "f", "id": 393, "pid": 2338710, "tid": 2379450, "ts": 6345940501866.106, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940501870.466, "dur": 4.884, + "args": { + "External id": 989406,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940501871.527, "dur": 3.638, + "args": { + "External id": 989407,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940501878.158, "dur": 18.158, + "args": { + "External id": 989408,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501904.210, "dur": 7.889, + "args": { + "External id": 989409,"Record function id": 0, "Sequence number": 10552525, "Fwd thread id": 1, "Ev Idx": 7392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338710, "tid": 2379450, + "ts": 6345940501905.246, "dur": 5.105, + "args": { + "External id": 989410,"Sequence number": 10552525, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7393 + } + }, + { + "ph": "f", "id": 394, "pid": 2338710, "tid": 2379450, "ts": 6345940501905.246, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2379450, + "ts": 6345940501906.374, "dur": 3.732, + "args": { + "External id": 989411,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2379450, + "ts": 6345940501907.160, "dur": 2.306, + "args": { + "External id": 989412,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940501908.813, "dur": 0.518, + "args": { + "External id": 989413,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940501916.812, "dur": 5.490, + "args": { + "External id": 989414,"Record function id": 0, "Ev Idx": 7397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940501918.233, "dur": 3.546, + "args": { + "External id": 989415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940501919.239, "dur": 2.006, + "args": { + "External id": 989416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940501919.950, "dur": 1.159, + "args": { + "External id": 989417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940501926.985, "dur": 514.223, + "args": { + "External id": 989418,"Record function id": 0, "Sequence number": 10552524, "Fwd thread id": 1, "Ev Idx": 7401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940501928.286, "dur": 468.821, + "args": { + "External id": 989419,"Sequence number": 10552524, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7402 + } + }, + { + "ph": "f", "id": 395, "pid": 2338710, "tid": 2379450, "ts": 6345940501928.286, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940501964.219, "dur": 1.867, + "args": { + "External id": 989420,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940501964.873, "dur": 1.058, + "args": { + "External id": 989421,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940501982.026, "dur": 4.320, + "args": { + "External id": 989422,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940501996.569, "dur": 2.558, + "args": { + "External id": 989423,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940502249.954, "dur": 3.959, + "args": { + "External id": 989424,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 7407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940502263.464, "dur": 51.703, + "args": { + "External id": 989425,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 7408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940502281.335, "dur": 1.357, + "args": { + "External id": 989426,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 7409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345940502322.378, "dur": 41.200, + "args": { + "External id": 989427,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 7410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345940502327.047, "dur": 36.247, + "args": { + "External id": 989428,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 7411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940502332.739, "dur": 5.697, + "args": { + "External id": 989429,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940502340.683, "dur": 22.024, + "args": { + "External id": 989430,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 7413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2379450, + "ts": 6345940502372.901, "dur": 3.106, + "args": { + "External id": 989431,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 7414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940502374.390, "dur": 1.503, + "args": { + "External id": 989432,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 7415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940502383.472, "dur": 5.013, + "args": { + "External id": 989433,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940502387.213, "dur": 1.113, + "args": { + "External id": 989434,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940502413.986, "dur": 21.030, + "args": { + "External id": 989435,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940502457.688, "dur": 10.941, + "args": { + "External id": 989436,"Record function id": 0, "Ev Idx": 7419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940502460.568, "dur": 7.097, + "args": { + "External id": 989437,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940502463.243, "dur": 3.370, + "args": { + "External id": 989438,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940502464.574, "dur": 1.894, + "args": { + "External id": 989439,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940502473.330, "dur": 3241.744, + "args": { + "External id": 989440,"Record function id": 0, "Ev Idx": 7423 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.25)", "pid": 2338710, "tid": 2379450, + "ts": 6345940502508.078, "dur": 1090.941, + "args": { + "External id": 989441,"Record function id": 0, "Ev Idx": 7424 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.24", "pid": 2338710, "tid": 2379450, + "ts": 6345940502536.068, "dur": 1052.725, + "args": { + "External id": 989442,"Record function id": 0, "Ev Idx": 7425 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2338710, "tid": 2379450, + "ts": 6345940502553.438, "dur": 1016.861, + "args": { + "External id": 989443,"Record function id": 0, "Ev Idx": 7426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940502642.154, "dur": 5.921, + "args": { + "External id": 989444,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345940502665.184, "dur": 39.884, + "args": { + "External id": 989445,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940502672.895, "dur": 1.187, + "args": { + "External id": 989446,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940502677.085, "dur": 0.596, + "args": { + "External id": 989447,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940502679.380, "dur": 0.408, + "args": { + "External id": 989448,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940502684.215, "dur": 0.460, + "args": { + "External id": 989449,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940502685.788, "dur": 2.795, + "args": { + "External id": 989450,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940502690.108, "dur": 2.557, + "args": { + "External id": 989451,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940502694.089, "dur": 0.382, + "args": { + "External id": 989452,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940502696.020, "dur": 0.330, + "args": { + "External id": 989453,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940502699.971, "dur": 0.426, + "args": { + "External id": 989454,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940502718.655, "dur": 51.845, + "args": { + "External id": 989455,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345940502813.509, "dur": 138.171, + "args": { + "External id": 989456,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 7439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940502827.059, "dur": 4.720, + "args": { + "External id": 989457,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345940502837.333, "dur": 13.924, + "args": { + "External id": 989458,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345940502844.479, "dur": 6.300, + "args": { + "External id": 989459,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 7442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940502848.182, "dur": 0.631, + "args": { + "External id": 989460,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345940502858.768, "dur": 30.822, + "args": { + "External id": 989461,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940502860.798, "dur": 0.680, + "args": { + "External id": 989462,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940502863.248, "dur": 3.230, + "args": { + "External id": 989463,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940502869.704, "dur": 0.626, + "args": { + "External id": 989464,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940502871.732, "dur": 0.660, + "args": { + "External id": 989465,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940502873.387, "dur": 0.462, + "args": { + "External id": 989466,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940502876.735, "dur": 0.404, + "args": { + "External id": 989467,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940502878.459, "dur": 0.661, + "args": { + "External id": 989468,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940502880.558, "dur": 2.235, + "args": { + "External id": 989469,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940502884.064, "dur": 0.278, + "args": { + "External id": 989470,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940502902.506, "dur": 37.816, + "args": { + "External id": 989471,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345940503038.773, "dur": 408.091, + "args": { + "External id": 989472,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 7455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345940503132.786, "dur": 307.598, + "args": { + "External id": 989473,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7456, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345940503148.179, "dur": 285.546, + "args": { + "External id": 989474,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 7457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345940503475.791, "dur": 2.316, + "args": { + "External id": 989475,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7458, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940503607.598, "dur": 2082.956, + "args": { + "External id": 989476,"Sequence number": 10552523, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7459 + } + }, + { + "ph": "f", "id": 396, "pid": 2338710, "tid": 2379450, "ts": 6345940503607.598, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940503749.344, "dur": 129.580, + "args": { + "External id": 989477,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 7460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345940503930.078, "dur": 48.830, + "args": { + "External id": 989478,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 7461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345940504001.739, "dur": 134.309, + "args": { + "External id": 989479,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 7462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940504152.967, "dur": 40.426, + "args": { + "External id": 989480,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940504200.565, "dur": 46.793, + "args": { + "External id": 989481,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940504255.551, "dur": 31.725, + "args": { + "External id": 989482,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940504297.974, "dur": 34.388, + "args": { + "External id": 989483,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345940504367.383, "dur": 31.730, + "args": { + "External id": 989484,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 7467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345940504421.756, "dur": 36.438, + "args": { + "External id": 989485,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345940504485.941, "dur": 23.903, + "args": { + "External id": 989486,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345940504528.057, "dur": 18.399, + "args": { + "External id": 989487,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940504557.724, "dur": 43.363, + "args": { + "External id": 989488,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940504605.243, "dur": 35.976, + "args": { + "External id": 989489,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345940504675.208, "dur": 283.285, + "args": { + "External id": 989490,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 7473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940504772.012, "dur": 7.681, + "args": { + "External id": 989491,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940504781.943, "dur": 2.564, + "args": { + "External id": 989492,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940504785.904, "dur": 2.476, + "args": { + "External id": 989493,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940504790.597, "dur": 6.435, + "args": { + "External id": 989494,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940504844.399, "dur": 7.955, + "args": { + "External id": 989495,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940504848.742, "dur": 3.358, + "args": { + "External id": 989496,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940504854.223, "dur": 34.945, + "args": { + "External id": 989497,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940504860.366, "dur": 2.202, + "args": { + "External id": 989498,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940504890.817, "dur": 2.228, + "args": { + "External id": 989499,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940504892.214, "dur": 0.741, + "args": { + "External id": 989500,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940504894.108, "dur": 16.526, + "args": { + "External id": 989501,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940504896.548, "dur": 0.739, + "args": { + "External id": 989502,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345940504999.479, "dur": 97.481, + "args": { + "External id": 989503,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345940505125.959, "dur": 23.083, + "args": { + "External id": 989504,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940505159.569, "dur": 59.010, + "args": { + "External id": 989505,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940505227.065, "dur": 46.569, + "args": { + "External id": 989506,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940505285.604, "dur": 23.725, + "args": { + "External id": 989507,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940505315.251, "dur": 35.461, + "args": { + "External id": 989508,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940505357.885, "dur": 30.738, + "args": { + "External id": 989509,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940505395.505, "dur": 32.973, + "args": { + "External id": 989510,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345940505455.015, "dur": 27.652, + "args": { + "External id": 989511,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 7494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345940505503.783, "dur": 31.988, + "args": { + "External id": 989512,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345940505554.789, "dur": 21.070, + "args": { + "External id": 989513,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345940505597.603, "dur": 16.118, + "args": { + "External id": 989514,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345940505631.861, "dur": 20.984, + "args": { + "External id": 989515,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 7498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940505741.664, "dur": 17.099, + "args": { + "External id": 989516,"Record function id": 0, "Ev Idx": 7499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940505745.230, "dur": 12.541, + "args": { + "External id": 989517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940505750.414, "dur": 6.218, + "args": { + "External id": 989518,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940505752.181, "dur": 4.340, + "args": { + "External id": 989519,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940505763.488, "dur": 5.468, + "args": { + "External id": 989520,"Record function id": 0, "Ev Idx": 7503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940505765.104, "dur": 3.348, + "args": { + "External id": 989521,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940505766.044, "dur": 1.792, + "args": { + "External id": 989522,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940505766.751, "dur": 0.973, + "args": { + "External id": 989523,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940505772.951, "dur": 4.518, + "args": { + "External id": 989524,"Record function id": 0, "Ev Idx": 7507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940505774.353, "dur": 2.588, + "args": { + "External id": 989525,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940505774.973, "dur": 1.509, + "args": { + "External id": 989526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940505775.613, "dur": 0.751, + "args": { + "External id": 989527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940505781.329, "dur": 7.051, + "args": { + "External id": 989528,"Record function id": 0, "Ev Idx": 7511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940505782.510, "dur": 5.366, + "args": { + "External id": 989529,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940505783.569, "dur": 3.849, + "args": { + "External id": 989530,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940505784.077, "dur": 3.211, + "args": { + "External id": 989531,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940505792.176, "dur": 4.548, + "args": { + "External id": 989532,"Record function id": 0, "Ev Idx": 7515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940505793.728, "dur": 2.480, + "args": { + "External id": 989533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940505794.294, "dur": 1.461, + "args": { + "External id": 989534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940505794.783, "dur": 0.871, + "args": { + "External id": 989535,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940505800.401, "dur": 4.367, + "args": { + "External id": 989536,"Record function id": 0, "Ev Idx": 7519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940505801.866, "dur": 2.406, + "args": { + "External id": 989537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940505802.410, "dur": 1.396, + "args": { + "External id": 989538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940505802.911, "dur": 0.783, + "args": { + "External id": 989539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940505808.567, "dur": 4.071, + "args": { + "External id": 989540,"Record function id": 0, "Ev Idx": 7523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940505809.877, "dur": 2.240, + "args": { + "External id": 989541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940505810.479, "dur": 1.172, + "args": { + "External id": 989542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940505810.814, "dur": 0.713, + "args": { + "External id": 989543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940505816.267, "dur": 6.355, + "args": { + "External id": 989544,"Record function id": 0, "Ev Idx": 7527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940505817.418, "dur": 4.666, + "args": { + "External id": 989545,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940505817.976, "dur": 3.488, + "args": { + "External id": 989546,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940505820.569, "dur": 0.777, + "args": { + "External id": 989547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940505826.257, "dur": 4.567, + "args": { + "External id": 989548,"Record function id": 0, "Ev Idx": 7531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940505827.737, "dur": 2.577, + "args": { + "External id": 989549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940505828.300, "dur": 1.385, + "args": { + "External id": 989550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940505828.613, "dur": 0.927, + "args": { + "External id": 989551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940505835.330, "dur": 280409.795, + "args": { + "External id": 989552,"Record function id": 0, "Sequence number": 10552522, "Fwd thread id": 1, "Ev Idx": 7535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940505836.733, "dur": 280396.637, + "args": { + "External id": 989553,"Sequence number": 10552522, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7536 + } + }, + { + "ph": "f", "id": 397, "pid": 2338710, "tid": 2379450, "ts": 6345940505836.733, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.25)", "pid": 2338710, "tid": 2379450, + "ts": 6345940505874.660, "dur": 47.525, + "args": { + "External id": 989554,"Record function id": 0, "Ev Idx": 7537 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.25)", "pid": 2338710, "tid": 2379450, + "ts": 6345940505931.960, "dur": 98.152, + "args": { + "External id": 989555,"Record function id": 0, "Ev Idx": 7538 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.25)", "pid": 2338710, "tid": 2379450, + "ts": 6345940506039.714, "dur": 280183.533, + "args": { + "External id": 989556,"Record function id": 0, "Ev Idx": 7539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940506146.825, "dur": 10.754, + "args": { + "External id": 989557,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940506173.016, "dur": 6.156, + "args": { + "External id": 989558,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345940506200.527, "dur": 278847.119, + "args": { + "External id": 989559,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345940506218.939, "dur": 278812.699, + "args": { + "External id": 989560,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940506397.382, "dur": 8.996, + "args": { + "External id": 989561,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345940506438.589, "dur": 278526.029, + "args": { + "External id": 989562,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 7545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345940506442.877, "dur": 278520.555, + "args": { + "External id": 989563,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 7546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940506449.264, "dur": 9.992, + "args": { + "External id": 989564,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940506461.677, "dur": 278495.487, + "args": { + "External id": 989565,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 7548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940785212.437, "dur": 16.520, + "args": { + "External id": 989566,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 7549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940785216.700, "dur": 11.704, + "args": { + "External id": 989567,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345940785273.160, "dur": 410.369, + "args": { + "External id": 989568,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 7551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345940785312.984, "dur": 364.629, + "args": { + "External id": 989569,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7552, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345940785332.725, "dur": 338.891, + "args": { + "External id": 989570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 7553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345940785709.139, "dur": 2.738, + "args": { + "External id": 989571,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7554, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940785783.703, "dur": 7.794, + "args": { + "External id": 989572,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940785806.351, "dur": 42.527, + "args": { + "External id": 989573,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940785861.764, "dur": 1.997, + "args": { + "External id": 989574,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940785870.092, "dur": 15.121, + "args": { + "External id": 989575,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940785892.183, "dur": 1.317, + "args": { + "External id": 989576,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940785898.523, "dur": 14.404, + "args": { + "External id": 989577,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940785920.640, "dur": 1.170, + "args": { + "External id": 989578,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940785926.560, "dur": 12.107, + "args": { + "External id": 989579,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940785944.413, "dur": 1.237, + "args": { + "External id": 989580,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940785949.702, "dur": 12.714, + "args": { + "External id": 989581,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940785967.277, "dur": 1.532, + "args": { + "External id": 989582,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940785974.317, "dur": 14.171, + "args": { + "External id": 989583,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940785997.039, "dur": 3.609, + "args": { + "External id": 989584,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940786004.994, "dur": 36.748, + "args": { + "External id": 989585,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940786086.765, "dur": 2.369, + "args": { + "External id": 989586,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940786097.227, "dur": 18.446, + "args": { + "External id": 989587,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940786122.839, "dur": 0.891, + "args": { + "External id": 989588,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940786128.313, "dur": 16.619, + "args": { + "External id": 989589,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 7572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940786266.044, "dur": 3423.526, + "args": { + "External id": 989590,"Record function id": 0, "Ev Idx": 7573 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.24)", "pid": 2338710, "tid": 2379450, + "ts": 6345940786290.492, "dur": 1294.982, + "args": { + "External id": 989591,"Record function id": 0, "Ev Idx": 7574 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2338710, "tid": 2379450, + "ts": 6345940786311.656, "dur": 398.984, + "args": { + "External id": 989592,"Record function id": 0, "Ev Idx": 7575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940786411.744, "dur": 5.941, + "args": { + "External id": 989593,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940786421.808, "dur": 1.087, + "args": { + "External id": 989594,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940786425.209, "dur": 0.895, + "args": { + "External id": 989595,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940786428.594, "dur": 0.873, + "args": { + "External id": 989596,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940786431.048, "dur": 0.949, + "args": { + "External id": 989597,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940786433.888, "dur": 3.896, + "args": { + "External id": 989598,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940786439.586, "dur": 0.894, + "args": { + "External id": 989599,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940786452.406, "dur": 1.169, + "args": { + "External id": 989600,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940786456.276, "dur": 0.920, + "args": { + "External id": 989601,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940786458.590, "dur": 1.187, + "args": { + "External id": 989602,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345940786480.999, "dur": 191.577, + "args": { + "External id": 989603,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345940786501.884, "dur": 164.638, + "args": { + "External id": 989604,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940786526.714, "dur": 19.392, + "args": { + "External id": 989605,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345940786551.708, "dur": 81.146, + "args": { + "External id": 989606,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 7589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345940786554.908, "dur": 77.553, + "args": { + "External id": 989607,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 7590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940786560.541, "dur": 7.911, + "args": { + "External id": 989608,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940786570.399, "dur": 61.373, + "args": { + "External id": 989609,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 7592 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.23", "pid": 2338710, "tid": 2379450, + "ts": 6345940786800.191, "dur": 775.587, + "args": { + "External id": 989610,"Record function id": 0, "Ev Idx": 7593 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2338710, "tid": 2379450, + "ts": 6345940786817.556, "dur": 743.572, + "args": { + "External id": 989611,"Record function id": 0, "Ev Idx": 7594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940786881.958, "dur": 7.189, + "args": { + "External id": 989612,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345940786909.123, "dur": 44.280, + "args": { + "External id": 989613,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940786915.807, "dur": 5.765, + "args": { + "External id": 989614,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940786923.821, "dur": 3.319, + "args": { + "External id": 989615,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940786929.093, "dur": 0.492, + "args": { + "External id": 989616,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940786931.146, "dur": 0.611, + "args": { + "External id": 989617,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940786935.465, "dur": 0.255, + "args": { + "External id": 989618,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940786940.030, "dur": 0.379, + "args": { + "External id": 989619,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940786942.170, "dur": 0.286, + "args": { + "External id": 989620,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940786944.923, "dur": 0.367, + "args": { + "External id": 989621,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940786947.050, "dur": 2.695, + "args": { + "External id": 989622,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940786965.554, "dur": 76.512, + "args": { + "External id": 989623,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345940787129.012, "dur": 148.059, + "args": { + "External id": 989624,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 7607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940787144.017, "dur": 8.178, + "args": { + "External id": 989625,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345940787159.382, "dur": 14.894, + "args": { + "External id": 989626,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345940787165.492, "dur": 8.316, + "args": { + "External id": 989627,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 7610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940787170.866, "dur": 0.678, + "args": { + "External id": 989628,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345940787183.868, "dur": 29.878, + "args": { + "External id": 989629,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940787186.575, "dur": 0.540, + "args": { + "External id": 989630,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940787189.297, "dur": 0.600, + "args": { + "External id": 989631,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940787191.361, "dur": 0.512, + "args": { + "External id": 989632,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940787193.874, "dur": 2.543, + "args": { + "External id": 989633,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940787197.836, "dur": 0.541, + "args": { + "External id": 989634,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940787199.695, "dur": 2.690, + "args": { + "External id": 989635,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940787205.602, "dur": 0.385, + "args": { + "External id": 989636,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940787207.391, "dur": 0.299, + "args": { + "External id": 989637,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940787209.347, "dur": 0.370, + "args": { + "External id": 989638,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940787227.724, "dur": 40.553, + "args": { + "External id": 989639,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345940787333.589, "dur": 144.205, + "args": { + "External id": 989640,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 7623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345940787370.412, "dur": 103.233, + "args": { + "External id": 989641,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7624, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345940787382.052, "dur": 86.703, + "args": { + "External id": 989642,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 7625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345940787497.637, "dur": 2.458, + "args": { + "External id": 989643,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7626, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940787593.091, "dur": 2072.212, + "args": { + "External id": 989644,"Sequence number": 10552521, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7627 + } + }, + { + "ph": "f", "id": 398, "pid": 2338710, "tid": 2379450, "ts": 6345940787593.091, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940787717.686, "dur": 125.180, + "args": { + "External id": 989645,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 7628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345940787893.173, "dur": 44.428, + "args": { + "External id": 989646,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 7629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345940787956.770, "dur": 83.716, + "args": { + "External id": 989647,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 7630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940788099.457, "dur": 47.107, + "args": { + "External id": 989648,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940788155.665, "dur": 38.508, + "args": { + "External id": 989649,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940788204.281, "dur": 30.724, + "args": { + "External id": 989650,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940788243.720, "dur": 32.271, + "args": { + "External id": 989651,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345940788309.924, "dur": 33.545, + "args": { + "External id": 989652,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 7635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345940788367.441, "dur": 35.770, + "args": { + "External id": 989653,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345940788427.213, "dur": 22.907, + "args": { + "External id": 989654,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345940788471.221, "dur": 20.108, + "args": { + "External id": 989655,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940788499.490, "dur": 42.991, + "args": { + "External id": 989656,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940788546.438, "dur": 38.128, + "args": { + "External id": 989657,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345940788617.024, "dur": 311.768, + "args": { + "External id": 989658,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 7641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940788708.625, "dur": 9.610, + "args": { + "External id": 989659,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940788729.679, "dur": 7.980, + "args": { + "External id": 989660,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940788741.375, "dur": 2.722, + "args": { + "External id": 989661,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940788745.575, "dur": 2.794, + "args": { + "External id": 989662,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940788798.775, "dur": 6.168, + "args": { + "External id": 989663,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940788801.187, "dur": 3.565, + "args": { + "External id": 989664,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940788806.905, "dur": 39.108, + "args": { + "External id": 989665,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940788813.377, "dur": 3.792, + "args": { + "External id": 989666,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940788847.773, "dur": 1.976, + "args": { + "External id": 989667,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940788848.908, "dur": 0.694, + "args": { + "External id": 989668,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940788850.590, "dur": 22.235, + "args": { + "External id": 989669,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940788853.470, "dur": 0.767, + "args": { + "External id": 989670,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345940788976.047, "dur": 55.843, + "args": { + "External id": 989671,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345940789093.626, "dur": 27.235, + "args": { + "External id": 989672,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940789133.310, "dur": 59.526, + "args": { + "External id": 989673,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940789203.061, "dur": 47.405, + "args": { + "External id": 989674,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940789260.070, "dur": 23.046, + "args": { + "External id": 989675,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940789289.450, "dur": 36.388, + "args": { + "External id": 989676,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940789333.893, "dur": 31.260, + "args": { + "External id": 989677,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940789374.477, "dur": 33.450, + "args": { + "External id": 989678,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345940789432.899, "dur": 32.011, + "args": { + "External id": 989679,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 7662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345940789484.145, "dur": 28.125, + "args": { + "External id": 989680,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345940789530.187, "dur": 21.387, + "args": { + "External id": 989681,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345940789570.664, "dur": 15.204, + "args": { + "External id": 989682,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345940789610.207, "dur": 19.783, + "args": { + "External id": 989683,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 7666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940789716.227, "dur": 16.784, + "args": { + "External id": 989684,"Record function id": 0, "Ev Idx": 7667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940789719.780, "dur": 12.165, + "args": { + "External id": 989685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940789724.603, "dur": 6.284, + "args": { + "External id": 989686,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940789726.253, "dur": 4.489, + "args": { + "External id": 989687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940789737.610, "dur": 5.096, + "args": { + "External id": 989688,"Record function id": 0, "Ev Idx": 7671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940789739.029, "dur": 3.162, + "args": { + "External id": 989689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940789739.811, "dur": 1.847, + "args": { + "External id": 989690,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940789740.585, "dur": 0.920, + "args": { + "External id": 989691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940789746.706, "dur": 7.105, + "args": { + "External id": 989692,"Record function id": 0, "Ev Idx": 7675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940789747.745, "dur": 5.553, + "args": { + "External id": 989693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940789748.443, "dur": 4.363, + "args": { + "External id": 989694,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940789749.180, "dur": 3.518, + "args": { + "External id": 989695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940789757.670, "dur": 45.415, + "args": { + "External id": 989696,"Record function id": 0, "Ev Idx": 7679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940789799.670, "dur": 2.864, + "args": { + "External id": 989697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940789800.625, "dur": 1.313, + "args": { + "External id": 989698,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940789801.014, "dur": 0.788, + "args": { + "External id": 989699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940789807.036, "dur": 5.200, + "args": { + "External id": 989700,"Record function id": 0, "Ev Idx": 7683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940789808.972, "dur": 2.777, + "args": { + "External id": 989701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940789809.743, "dur": 1.224, + "args": { + "External id": 989702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940789810.158, "dur": 0.697, + "args": { + "External id": 989703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940789815.953, "dur": 7.392, + "args": { + "External id": 989704,"Record function id": 0, "Ev Idx": 7687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940789817.229, "dur": 5.618, + "args": { + "External id": 989705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940789818.126, "dur": 4.227, + "args": { + "External id": 989706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940789821.505, "dur": 0.744, + "args": { + "External id": 989707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940789827.532, "dur": 4.570, + "args": { + "External id": 989708,"Record function id": 0, "Ev Idx": 7691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940789828.827, "dur": 2.640, + "args": { + "External id": 989709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940789829.579, "dur": 1.453, + "args": { + "External id": 989710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940789829.947, "dur": 0.985, + "args": { + "External id": 989711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940789836.098, "dur": 4.161, + "args": { + "External id": 989712,"Record function id": 0, "Ev Idx": 7695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940789837.481, "dur": 2.257, + "args": { + "External id": 989713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940789838.039, "dur": 1.206, + "args": { + "External id": 989714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940789838.377, "dur": 0.781, + "args": { + "External id": 989715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940789844.473, "dur": 4.087, + "args": { + "External id": 989716,"Record function id": 0, "Ev Idx": 7699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940789845.711, "dur": 2.343, + "args": { + "External id": 989717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940789846.313, "dur": 1.300, + "args": { + "External id": 989718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940789846.791, "dur": 0.735, + "args": { + "External id": 989719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940789853.127, "dur": 79772.586, + "args": { + "External id": 989720,"Record function id": 0, "Sequence number": 10552520, "Fwd thread id": 1, "Ev Idx": 7703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940789854.605, "dur": 79761.127, + "args": { + "External id": 989721,"Sequence number": 10552520, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7704 + } + }, + { + "ph": "f", "id": 399, "pid": 2338710, "tid": 2379450, "ts": 6345940789854.605, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.24)", "pid": 2338710, "tid": 2379450, + "ts": 6345940789889.313, "dur": 45.444, + "args": { + "External id": 989722,"Record function id": 0, "Ev Idx": 7705 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.24)", "pid": 2338710, "tid": 2379450, + "ts": 6345940789944.433, "dur": 94.151, + "args": { + "External id": 989723,"Record function id": 0, "Ev Idx": 7706 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.24)", "pid": 2338710, "tid": 2379450, + "ts": 6345940790047.802, "dur": 79557.707, + "args": { + "External id": 989724,"Record function id": 0, "Ev Idx": 7707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940790196.631, "dur": 10.492, + "args": { + "External id": 989725,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940790219.264, "dur": 6.036, + "args": { + "External id": 989726,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345940790245.866, "dur": 78167.798, + "args": { + "External id": 989727,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345940790262.633, "dur": 78135.790, + "args": { + "External id": 989728,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940790417.658, "dur": 24.027, + "args": { + "External id": 989729,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345940790481.142, "dur": 77869.163, + "args": { + "External id": 989730,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 7713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345940790484.862, "dur": 77864.230, + "args": { + "External id": 989731,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 7714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940790491.947, "dur": 14.911, + "args": { + "External id": 989732,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940790509.557, "dur": 77836.268, + "args": { + "External id": 989733,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 7716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940868537.953, "dur": 15.825, + "args": { + "External id": 989734,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 7717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940868543.136, "dur": 10.127, + "args": { + "External id": 989735,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345940868595.987, "dur": 493.943, + "args": { + "External id": 989736,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 7719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345940868640.154, "dur": 409.331, + "args": { + "External id": 989737,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7720, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345940868659.325, "dur": 382.129, + "args": { + "External id": 989738,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 7721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345940869125.720, "dur": 3.426, + "args": { + "External id": 989739,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7722, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940869207.664, "dur": 8.673, + "args": { + "External id": 989740,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940869234.704, "dur": 47.546, + "args": { + "External id": 989741,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940869294.816, "dur": 3.305, + "args": { + "External id": 989742,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940869304.648, "dur": 17.007, + "args": { + "External id": 989743,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940869328.608, "dur": 1.292, + "args": { + "External id": 989744,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940869338.003, "dur": 15.578, + "args": { + "External id": 989745,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940869359.325, "dur": 1.094, + "args": { + "External id": 989746,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940869367.266, "dur": 14.386, + "args": { + "External id": 989747,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940869387.135, "dur": 1.014, + "args": { + "External id": 989748,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940869393.735, "dur": 16.465, + "args": { + "External id": 989749,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940869415.334, "dur": 1.612, + "args": { + "External id": 989750,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940869421.049, "dur": 14.030, + "args": { + "External id": 989751,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940869443.162, "dur": 4.257, + "args": { + "External id": 989752,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940869459.628, "dur": 17.280, + "args": { + "External id": 989753,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940869485.168, "dur": 1.049, + "args": { + "External id": 989754,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940869491.993, "dur": 14.242, + "args": { + "External id": 989755,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940869511.499, "dur": 0.946, + "args": { + "External id": 989756,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940869519.609, "dur": 15.440, + "args": { + "External id": 989757,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 7740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940869645.162, "dur": 3339.277, + "args": { + "External id": 989758,"Record function id": 0, "Ev Idx": 7741 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.23)", "pid": 2338710, "tid": 2379450, + "ts": 6345940869669.925, "dur": 1227.601, + "args": { + "External id": 989759,"Record function id": 0, "Ev Idx": 7742 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2338710, "tid": 2379450, + "ts": 6345940869689.728, "dur": 428.850, + "args": { + "External id": 989760,"Record function id": 0, "Ev Idx": 7743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940869784.805, "dur": 7.158, + "args": { + "External id": 989761,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940869795.603, "dur": 0.987, + "args": { + "External id": 989762,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940869799.139, "dur": 1.060, + "args": { + "External id": 989763,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940869802.514, "dur": 1.006, + "args": { + "External id": 989764,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940869805.218, "dur": 0.965, + "args": { + "External id": 989765,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940869807.984, "dur": 3.853, + "args": { + "External id": 989766,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940869813.949, "dur": 0.954, + "args": { + "External id": 989767,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940869818.660, "dur": 1.327, + "args": { + "External id": 989768,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940869822.034, "dur": 1.433, + "args": { + "External id": 989769,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940869825.120, "dur": 0.974, + "args": { + "External id": 989770,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345940869848.009, "dur": 192.150, + "args": { + "External id": 989771,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345940869867.199, "dur": 165.204, + "args": { + "External id": 989772,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940869886.665, "dur": 17.586, + "args": { + "External id": 989773,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345940869909.550, "dur": 71.128, + "args": { + "External id": 989774,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 7757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345940869912.439, "dur": 67.864, + "args": { + "External id": 989775,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 7758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940869917.171, "dur": 6.663, + "args": { + "External id": 989776,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940869925.574, "dur": 54.057, + "args": { + "External id": 989777,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 7760 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.22", "pid": 2338710, "tid": 2379450, + "ts": 6345940870217.812, "dur": 670.807, + "args": { + "External id": 989778,"Record function id": 0, "Ev Idx": 7761 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2338710, "tid": 2379450, + "ts": 6345940870236.493, "dur": 638.822, + "args": { + "External id": 989779,"Record function id": 0, "Ev Idx": 7762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940870301.356, "dur": 8.375, + "args": { + "External id": 989780,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345940870327.464, "dur": 38.529, + "args": { + "External id": 989781,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940870333.587, "dur": 5.796, + "args": { + "External id": 989782,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940870341.471, "dur": 0.319, + "args": { + "External id": 989783,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940870343.285, "dur": 0.495, + "args": { + "External id": 989784,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940870347.090, "dur": 0.297, + "args": { + "External id": 989785,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940870349.127, "dur": 0.488, + "args": { + "External id": 989786,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940870351.224, "dur": 0.404, + "args": { + "External id": 989787,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940870355.805, "dur": 0.499, + "args": { + "External id": 989788,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940870357.394, "dur": 0.525, + "args": { + "External id": 989789,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940870359.350, "dur": 2.948, + "args": { + "External id": 989790,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940870377.985, "dur": 52.072, + "args": { + "External id": 989791,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345940870468.009, "dur": 130.263, + "args": { + "External id": 989792,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 7775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940870480.718, "dur": 4.167, + "args": { + "External id": 989793,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345940870490.778, "dur": 11.873, + "args": { + "External id": 989794,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345940870495.876, "dur": 6.282, + "args": { + "External id": 989795,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 7778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940870499.966, "dur": 0.696, + "args": { + "External id": 989796,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345940870510.674, "dur": 27.701, + "args": { + "External id": 989797,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940870513.155, "dur": 0.508, + "args": { + "External id": 989798,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940870515.533, "dur": 0.337, + "args": { + "External id": 989799,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940870517.065, "dur": 2.033, + "args": { + "External id": 989800,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940870520.678, "dur": 0.541, + "args": { + "External id": 989801,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940870522.445, "dur": 0.469, + "args": { + "External id": 989802,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940870525.753, "dur": 2.777, + "args": { + "External id": 989803,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940870529.770, "dur": 0.266, + "args": { + "External id": 989804,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940870531.525, "dur": 0.604, + "args": { + "External id": 989805,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940870534.174, "dur": 0.571, + "args": { + "External id": 989806,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940870553.245, "dur": 36.545, + "args": { + "External id": 989807,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345940870650.753, "dur": 145.074, + "args": { + "External id": 989808,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 7791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345940870685.416, "dur": 106.114, + "args": { + "External id": 989809,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7792, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345940870698.725, "dur": 86.721, + "args": { + "External id": 989810,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 7793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345940870815.995, "dur": 2.281, + "args": { + "External id": 989811,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7794, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940870906.355, "dur": 2051.536, + "args": { + "External id": 989812,"Sequence number": 10552519, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7795 + } + }, + { + "ph": "f", "id": 400, "pid": 2338710, "tid": 2379450, "ts": 6345940870906.355, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940871050.197, "dur": 171.703, + "args": { + "External id": 989813,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 7796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345940871281.564, "dur": 49.105, + "args": { + "External id": 989814,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 7797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345940871351.870, "dur": 63.542, + "args": { + "External id": 989815,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 7798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940871427.856, "dur": 36.605, + "args": { + "External id": 989816,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940871472.002, "dur": 36.244, + "args": { + "External id": 989817,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940871519.160, "dur": 32.828, + "args": { + "External id": 989818,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940871560.365, "dur": 32.965, + "args": { + "External id": 989819,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345940871621.930, "dur": 30.261, + "args": { + "External id": 989820,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 7803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345940871673.780, "dur": 33.420, + "args": { + "External id": 989821,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345940871732.356, "dur": 21.561, + "args": { + "External id": 989822,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345940871774.707, "dur": 16.160, + "args": { + "External id": 989823,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940871799.034, "dur": 39.361, + "args": { + "External id": 989824,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940871842.411, "dur": 36.467, + "args": { + "External id": 989825,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345940871911.249, "dur": 400.334, + "args": { + "External id": 989826,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 7809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940872043.090, "dur": 54.476, + "args": { + "External id": 989827,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940872103.948, "dur": 3.658, + "args": { + "External id": 989828,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940872109.086, "dur": 3.167, + "args": { + "External id": 989829,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940872113.623, "dur": 2.862, + "args": { + "External id": 989830,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940872174.005, "dur": 13.988, + "args": { + "External id": 989831,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940872184.270, "dur": 3.483, + "args": { + "External id": 989832,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940872190.408, "dur": 36.259, + "args": { + "External id": 989833,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940872197.593, "dur": 2.117, + "args": { + "External id": 989834,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940872228.290, "dur": 2.250, + "args": { + "External id": 989835,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940872229.655, "dur": 0.754, + "args": { + "External id": 989836,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940872232.307, "dur": 19.927, + "args": { + "External id": 989837,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940872235.366, "dur": 0.822, + "args": { + "External id": 989838,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345940872356.628, "dur": 33.913, + "args": { + "External id": 989839,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345940872414.917, "dur": 18.715, + "args": { + "External id": 989840,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940872442.936, "dur": 57.768, + "args": { + "External id": 989841,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940872508.185, "dur": 46.800, + "args": { + "External id": 989842,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940872566.673, "dur": 24.106, + "args": { + "External id": 989843,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940872597.037, "dur": 34.174, + "args": { + "External id": 989844,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940872638.763, "dur": 30.815, + "args": { + "External id": 989845,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940872676.394, "dur": 33.872, + "args": { + "External id": 989846,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345940872735.315, "dur": 27.331, + "args": { + "External id": 989847,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 7830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345940872781.072, "dur": 27.321, + "args": { + "External id": 989848,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345940872828.492, "dur": 19.933, + "args": { + "External id": 989849,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345940872863.854, "dur": 18.758, + "args": { + "External id": 989850,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345940872900.252, "dur": 19.099, + "args": { + "External id": 989851,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 7834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940873028.239, "dur": 20.061, + "args": { + "External id": 989852,"Record function id": 0, "Ev Idx": 7835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940873033.133, "dur": 13.665, + "args": { + "External id": 989853,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940873038.326, "dur": 6.814, + "args": { + "External id": 989854,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940873040.187, "dur": 4.640, + "args": { + "External id": 989855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940873094.839, "dur": 12.436, + "args": { + "External id": 989856,"Record function id": 0, "Ev Idx": 7839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940873100.267, "dur": 6.112, + "args": { + "External id": 989857,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940873102.219, "dur": 3.134, + "args": { + "External id": 989858,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940873103.370, "dur": 1.749, + "args": { + "External id": 989859,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940873111.919, "dur": 5.888, + "args": { + "External id": 989860,"Record function id": 0, "Ev Idx": 7843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940873113.237, "dur": 4.100, + "args": { + "External id": 989861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940873114.246, "dur": 2.494, + "args": { + "External id": 989862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940873115.770, "dur": 0.857, + "args": { + "External id": 989863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940873121.775, "dur": 5.180, + "args": { + "External id": 989864,"Record function id": 0, "Ev Idx": 7847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940873123.695, "dur": 2.805, + "args": { + "External id": 989865,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940873124.567, "dur": 1.431, + "args": { + "External id": 989866,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940873124.947, "dur": 0.946, + "args": { + "External id": 989867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940873130.627, "dur": 8.450, + "args": { + "External id": 989868,"Record function id": 0, "Ev Idx": 7851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940873132.686, "dur": 5.884, + "args": { + "External id": 989869,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940873133.328, "dur": 4.618, + "args": { + "External id": 989870,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940873134.004, "dur": 3.860, + "args": { + "External id": 989871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940873142.686, "dur": 4.725, + "args": { + "External id": 989872,"Record function id": 0, "Ev Idx": 7855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940873144.109, "dur": 2.832, + "args": { + "External id": 989873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940873144.706, "dur": 1.753, + "args": { + "External id": 989874,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940873145.399, "dur": 0.979, + "args": { + "External id": 989875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940873151.120, "dur": 4.202, + "args": { + "External id": 989876,"Record function id": 0, "Ev Idx": 7859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940873152.313, "dur": 2.556, + "args": { + "External id": 989877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940873152.895, "dur": 1.504, + "args": { + "External id": 989878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940873153.517, "dur": 0.793, + "args": { + "External id": 989879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940873158.923, "dur": 7.283, + "args": { + "External id": 989880,"Record function id": 0, "Ev Idx": 7863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940873160.418, "dur": 5.280, + "args": { + "External id": 989881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940873161.173, "dur": 4.048, + "args": { + "External id": 989882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940873164.293, "dur": 0.806, + "args": { + "External id": 989883,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940873169.911, "dur": 5.266, + "args": { + "External id": 989884,"Record function id": 0, "Ev Idx": 7867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940873171.208, "dur": 3.502, + "args": { + "External id": 989885,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940873172.025, "dur": 2.102, + "args": { + "External id": 989886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940873173.312, "dur": 0.728, + "args": { + "External id": 989887,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940873179.795, "dur": 74209.242, + "args": { + "External id": 989888,"Record function id": 0, "Sequence number": 10552518, "Fwd thread id": 1, "Ev Idx": 7871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940873181.224, "dur": 74196.232, + "args": { + "External id": 989889,"Sequence number": 10552518, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7872 + } + }, + { + "ph": "f", "id": 401, "pid": 2338710, "tid": 2379450, "ts": 6345940873181.224, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.23)", "pid": 2338710, "tid": 2379450, + "ts": 6345940873217.420, "dur": 45.364, + "args": { + "External id": 989890,"Record function id": 0, "Ev Idx": 7873 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.23)", "pid": 2338710, "tid": 2379450, + "ts": 6345940873272.615, "dur": 74.060, + "args": { + "External id": 989891,"Record function id": 0, "Ev Idx": 7874 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.23)", "pid": 2338710, "tid": 2379450, + "ts": 6345940873353.570, "dur": 74012.904, + "args": { + "External id": 989892,"Record function id": 0, "Ev Idx": 7875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940873461.346, "dur": 8.605, + "args": { + "External id": 989893,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940873480.987, "dur": 5.307, + "args": { + "External id": 989894,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345940873502.781, "dur": 72675.162, + "args": { + "External id": 989895,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345940873521.054, "dur": 72640.055, + "args": { + "External id": 989896,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940873626.879, "dur": 22.396, + "args": { + "External id": 989897,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345940873674.350, "dur": 72428.451, + "args": { + "External id": 989898,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 7881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345940873681.270, "dur": 72420.373, + "args": { + "External id": 989899,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 7882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940873688.847, "dur": 9.842, + "args": { + "External id": 989900,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940873701.171, "dur": 72393.409, + "args": { + "External id": 989901,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 7884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940946321.314, "dur": 16.565, + "args": { + "External id": 989902,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 7885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940946326.326, "dur": 11.067, + "args": { + "External id": 989903,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345940946380.105, "dur": 435.233, + "args": { + "External id": 989904,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 7887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345940946423.466, "dur": 386.334, + "args": { + "External id": 989905,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7888, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345940946442.004, "dur": 361.040, + "args": { + "External id": 989906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 7889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345940946841.510, "dur": 2.948, + "args": { + "External id": 989907,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7890, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940946912.224, "dur": 10.716, + "args": { + "External id": 989908,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940946940.678, "dur": 44.552, + "args": { + "External id": 989909,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940946998.094, "dur": 3.126, + "args": { + "External id": 989910,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940947006.716, "dur": 41.349, + "args": { + "External id": 989911,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940947094.472, "dur": 3.024, + "args": { + "External id": 989912,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940947112.068, "dur": 22.502, + "args": { + "External id": 989913,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940947142.928, "dur": 1.071, + "args": { + "External id": 989914,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940947149.960, "dur": 14.793, + "args": { + "External id": 989915,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940947170.116, "dur": 1.130, + "args": { + "External id": 989916,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940947176.135, "dur": 14.970, + "args": { + "External id": 989917,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940947196.332, "dur": 3.405, + "args": { + "External id": 989918,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940947204.305, "dur": 12.508, + "args": { + "External id": 989919,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940947224.292, "dur": 1.146, + "args": { + "External id": 989920,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940947231.236, "dur": 13.582, + "args": { + "External id": 989921,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940947249.405, "dur": 0.990, + "args": { + "External id": 989922,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940947254.953, "dur": 13.193, + "args": { + "External id": 989923,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940947272.716, "dur": 3.792, + "args": { + "External id": 989924,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345940947280.301, "dur": 12.919, + "args": { + "External id": 989925,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 7908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940947410.677, "dur": 3367.749, + "args": { + "External id": 989926,"Record function id": 0, "Ev Idx": 7909 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.22)", "pid": 2338710, "tid": 2379450, + "ts": 6345940947438.622, "dur": 1228.834, + "args": { + "External id": 989927,"Record function id": 0, "Ev Idx": 7910 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2338710, "tid": 2379450, + "ts": 6345940947459.380, "dur": 363.215, + "args": { + "External id": 989928,"Record function id": 0, "Ev Idx": 7911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940947553.157, "dur": 5.068, + "args": { + "External id": 989929,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940947562.164, "dur": 0.931, + "args": { + "External id": 989930,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940947565.150, "dur": 1.183, + "args": { + "External id": 989931,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940947569.091, "dur": 0.928, + "args": { + "External id": 989932,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940947571.578, "dur": 1.265, + "args": { + "External id": 989933,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940947575.128, "dur": 1.217, + "args": { + "External id": 989934,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940947578.087, "dur": 0.823, + "args": { + "External id": 989935,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940947583.087, "dur": 3.817, + "args": { + "External id": 989936,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940947588.347, "dur": 1.133, + "args": { + "External id": 989937,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940947591.417, "dur": 1.121, + "args": { + "External id": 989938,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345940947612.948, "dur": 175.730, + "args": { + "External id": 989939,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345940947633.040, "dur": 150.541, + "args": { + "External id": 989940,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940947651.836, "dur": 17.815, + "args": { + "External id": 989941,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345940947677.345, "dur": 76.774, + "args": { + "External id": 989942,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 7925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345940947680.680, "dur": 73.060, + "args": { + "External id": 989943,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 7926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940947685.516, "dur": 6.904, + "args": { + "External id": 989944,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940947694.650, "dur": 58.395, + "args": { + "External id": 989945,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 7928 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.21", "pid": 2338710, "tid": 2379450, + "ts": 6345940947909.642, "dur": 748.026, + "args": { + "External id": 989946,"Record function id": 0, "Ev Idx": 7929 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2338710, "tid": 2379450, + "ts": 6345940947928.355, "dur": 715.050, + "args": { + "External id": 989947,"Record function id": 0, "Ev Idx": 7930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940947989.939, "dur": 7.002, + "args": { + "External id": 989948,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345940948036.928, "dur": 77.375, + "args": { + "External id": 989949,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940948042.928, "dur": 4.085, + "args": { + "External id": 989950,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940948049.399, "dur": 0.433, + "args": { + "External id": 989951,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940948087.825, "dur": 4.094, + "args": { + "External id": 989952,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940948096.079, "dur": 0.602, + "args": { + "External id": 989953,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940948098.717, "dur": 0.364, + "args": { + "External id": 989954,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940948100.592, "dur": 0.321, + "args": { + "External id": 989955,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940948104.055, "dur": 0.675, + "args": { + "External id": 989956,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940948105.973, "dur": 0.636, + "args": { + "External id": 989957,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940948108.034, "dur": 1.964, + "args": { + "External id": 989958,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940948131.415, "dur": 54.895, + "args": { + "External id": 989959,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345940948227.396, "dur": 132.200, + "args": { + "External id": 989960,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 7943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940948241.691, "dur": 6.185, + "args": { + "External id": 989961,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345940948254.283, "dur": 14.173, + "args": { + "External id": 989962,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345940948259.138, "dur": 8.822, + "args": { + "External id": 989963,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 7946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940948263.239, "dur": 2.999, + "args": { + "External id": 989964,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345940948276.698, "dur": 29.253, + "args": { + "External id": 989965,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940948279.437, "dur": 0.685, + "args": { + "External id": 989966,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940948281.856, "dur": 1.388, + "args": { + "External id": 989967,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940948284.469, "dur": 0.472, + "args": { + "External id": 989968,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940948286.481, "dur": 0.500, + "args": { + "External id": 989969,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940948289.745, "dur": 0.371, + "args": { + "External id": 989970,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940948291.913, "dur": 0.492, + "args": { + "External id": 989971,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940948293.563, "dur": 0.557, + "args": { + "External id": 989972,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940948297.422, "dur": 2.926, + "args": { + "External id": 989973,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940948301.922, "dur": 0.496, + "args": { + "External id": 989974,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940948318.009, "dur": 33.174, + "args": { + "External id": 989975,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345940948414.069, "dur": 144.236, + "args": { + "External id": 989976,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 7959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345940948451.799, "dur": 102.536, + "args": { + "External id": 989977,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7960, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345940948462.743, "dur": 86.690, + "args": { + "External id": 989978,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 7961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345940948578.884, "dur": 2.326, + "args": { + "External id": 989979,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7962, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940948684.434, "dur": 2066.756, + "args": { + "External id": 989980,"Sequence number": 10552517, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7963 + } + }, + { + "ph": "f", "id": 402, "pid": 2338710, "tid": 2379450, "ts": 6345940948684.434, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940948809.201, "dur": 128.317, + "args": { + "External id": 989981,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 7964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345940948988.808, "dur": 110.895, + "args": { + "External id": 989982,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 7965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345940949127.436, "dur": 72.367, + "args": { + "External id": 989983,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 7966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940949212.216, "dur": 37.343, + "args": { + "External id": 989984,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940949256.730, "dur": 36.351, + "args": { + "External id": 989985,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940949300.977, "dur": 32.367, + "args": { + "External id": 989986,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940949343.197, "dur": 33.534, + "args": { + "External id": 989987,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345940949409.880, "dur": 28.646, + "args": { + "External id": 989988,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 7971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345940949463.058, "dur": 35.328, + "args": { + "External id": 989989,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345940949522.013, "dur": 23.064, + "args": { + "External id": 989990,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345940949564.943, "dur": 19.456, + "args": { + "External id": 989991,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940949594.780, "dur": 42.946, + "args": { + "External id": 989992,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940949641.780, "dur": 38.460, + "args": { + "External id": 989993,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345940949713.133, "dur": 381.848, + "args": { + "External id": 989994,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 7977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940949814.997, "dur": 10.648, + "args": { + "External id": 989995,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940949828.538, "dur": 3.318, + "args": { + "External id": 989996,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940949833.111, "dur": 5.171, + "args": { + "External id": 989997,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940949839.454, "dur": 3.264, + "args": { + "External id": 989998,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940949898.841, "dur": 5.574, + "args": { + "External id": 989999,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940949900.785, "dur": 3.356, + "args": { + "External id": 990000,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940949906.744, "dur": 40.397, + "args": { + "External id": 990001,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940949912.755, "dur": 1.907, + "args": { + "External id": 990002,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345940949948.954, "dur": 2.047, + "args": { + "External id": 990003,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940949950.103, "dur": 0.769, + "args": { + "External id": 990004,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345940949954.907, "dur": 17.789, + "args": { + "External id": 990005,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940949957.004, "dur": 1.296, + "args": { + "External id": 990006,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345940950145.006, "dur": 36.315, + "args": { + "External id": 990007,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345940950202.006, "dur": 20.020, + "args": { + "External id": 990008,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940950231.730, "dur": 59.780, + "args": { + "External id": 990009,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940950299.412, "dur": 47.354, + "args": { + "External id": 990010,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940950358.603, "dur": 23.411, + "args": { + "External id": 990011,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940950388.002, "dur": 35.929, + "args": { + "External id": 990012,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940950431.205, "dur": 32.031, + "args": { + "External id": 990013,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345940950469.851, "dur": 34.258, + "args": { + "External id": 990014,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345940950529.099, "dur": 27.178, + "args": { + "External id": 990015,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 7998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345940950577.131, "dur": 29.105, + "args": { + "External id": 990016,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345940950625.032, "dur": 19.924, + "args": { + "External id": 990017,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345940950663.811, "dur": 16.139, + "args": { + "External id": 990018,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345940950694.234, "dur": 19.192, + "args": { + "External id": 990019,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940950804.400, "dur": 16.970, + "args": { + "External id": 990020,"Record function id": 0, "Ev Idx": 8003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940950808.282, "dur": 12.021, + "args": { + "External id": 990021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940950812.970, "dur": 6.458, + "args": { + "External id": 990022,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940950814.767, "dur": 4.530, + "args": { + "External id": 990023,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940950826.004, "dur": 5.463, + "args": { + "External id": 990024,"Record function id": 0, "Ev Idx": 8007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940950827.486, "dur": 3.396, + "args": { + "External id": 990025,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940950828.445, "dur": 1.932, + "args": { + "External id": 990026,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940950829.220, "dur": 1.049, + "args": { + "External id": 990027,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940950835.429, "dur": 5.157, + "args": { + "External id": 990028,"Record function id": 0, "Ev Idx": 8011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940950837.120, "dur": 2.946, + "args": { + "External id": 990029,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940950837.969, "dur": 1.599, + "args": { + "External id": 990030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940950838.804, "dur": 0.688, + "args": { + "External id": 990031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940950844.516, "dur": 4.808, + "args": { + "External id": 990032,"Record function id": 0, "Ev Idx": 8015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940950846.170, "dur": 2.676, + "args": { + "External id": 990033,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940950847.023, "dur": 1.339, + "args": { + "External id": 990034,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940950847.386, "dur": 0.876, + "args": { + "External id": 990035,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940950852.951, "dur": 6.625, + "args": { + "External id": 990036,"Record function id": 0, "Ev Idx": 8019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940950854.317, "dur": 4.752, + "args": { + "External id": 990037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940950854.867, "dur": 3.638, + "args": { + "External id": 990038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940950855.182, "dur": 3.247, + "args": { + "External id": 990039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940950863.318, "dur": 7.097, + "args": { + "External id": 990040,"Record function id": 0, "Ev Idx": 8023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940950864.620, "dur": 5.289, + "args": { + "External id": 990041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940950865.354, "dur": 4.083, + "args": { + "External id": 990042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940950868.548, "dur": 0.774, + "args": { + "External id": 990043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940950874.327, "dur": 4.681, + "args": { + "External id": 990044,"Record function id": 0, "Ev Idx": 8027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940950875.643, "dur": 2.883, + "args": { + "External id": 990045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940950876.441, "dur": 1.596, + "args": { + "External id": 990046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940950877.192, "dur": 0.768, + "args": { + "External id": 990047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940950882.634, "dur": 4.791, + "args": { + "External id": 990048,"Record function id": 0, "Ev Idx": 8031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940950883.984, "dur": 2.962, + "args": { + "External id": 990049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940950884.564, "dur": 1.565, + "args": { + "External id": 990050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940950885.299, "dur": 0.717, + "args": { + "External id": 990051,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940950891.053, "dur": 4.619, + "args": { + "External id": 990052,"Record function id": 0, "Ev Idx": 8035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345940950892.590, "dur": 2.587, + "args": { + "External id": 990053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940950893.294, "dur": 1.370, + "args": { + "External id": 990054,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345940950893.804, "dur": 0.785, + "args": { + "External id": 990055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940950900.351, "dur": 72433.561, + "args": { + "External id": 990056,"Record function id": 0, "Sequence number": 10552516, "Fwd thread id": 1, "Ev Idx": 8039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345940950901.843, "dur": 72421.223, + "args": { + "External id": 990057,"Sequence number": 10552516, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8040 + } + }, + { + "ph": "f", "id": 403, "pid": 2338710, "tid": 2379450, "ts": 6345940950901.843, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.22)", "pid": 2338710, "tid": 2379450, + "ts": 6345940950939.365, "dur": 44.301, + "args": { + "External id": 990058,"Record function id": 0, "Ev Idx": 8041 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.22)", "pid": 2338710, "tid": 2379450, + "ts": 6345940950992.993, "dur": 131.498, + "args": { + "External id": 990059,"Record function id": 0, "Ev Idx": 8042 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.22)", "pid": 2338710, "tid": 2379450, + "ts": 6345940951134.127, "dur": 72178.150, + "args": { + "External id": 990060,"Record function id": 0, "Ev Idx": 8043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940951246.699, "dur": 8.966, + "args": { + "External id": 990061,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345940951268.449, "dur": 5.946, + "args": { + "External id": 990062,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345940951295.058, "dur": 70841.680, + "args": { + "External id": 990063,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345940951311.181, "dur": 70808.574, + "args": { + "External id": 990064,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345940951414.263, "dur": 22.697, + "args": { + "External id": 990065,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345940951461.278, "dur": 70572.000, + "args": { + "External id": 990066,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345940951465.311, "dur": 70566.635, + "args": { + "External id": 990067,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345940951472.465, "dur": 9.337, + "args": { + "External id": 990068,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345940951487.559, "dur": 70537.156, + "args": { + "External id": 990069,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941022277.990, "dur": 16.693, + "args": { + "External id": 990070,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941022283.058, "dur": 11.102, + "args": { + "External id": 990071,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941022339.104, "dur": 411.693, + "args": { + "External id": 990072,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941022382.070, "dur": 361.780, + "args": { + "External id": 990073,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8056, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941022399.511, "dur": 336.692, + "args": { + "External id": 990074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941022783.120, "dur": 2.709, + "args": { + "External id": 990075,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8058, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941022863.481, "dur": 10.439, + "args": { + "External id": 990076,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941022893.053, "dur": 41.973, + "args": { + "External id": 990077,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941022948.175, "dur": 2.535, + "args": { + "External id": 990078,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941022957.183, "dur": 15.208, + "args": { + "External id": 990079,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941022980.417, "dur": 1.023, + "args": { + "External id": 990080,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941022987.809, "dur": 14.488, + "args": { + "External id": 990081,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941023028.405, "dur": 3.002, + "args": { + "External id": 990082,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941023037.989, "dur": 57.117, + "args": { + "External id": 990083,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941023106.578, "dur": 2.904, + "args": { + "External id": 990084,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941023114.638, "dur": 17.662, + "args": { + "External id": 990085,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941023137.058, "dur": 1.682, + "args": { + "External id": 990086,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941023146.644, "dur": 15.083, + "args": { + "External id": 990087,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941023166.591, "dur": 1.253, + "args": { + "External id": 990088,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941023172.278, "dur": 13.551, + "args": { + "External id": 990089,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941023190.647, "dur": 0.781, + "args": { + "External id": 990090,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941023196.145, "dur": 12.313, + "args": { + "External id": 990091,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941023216.869, "dur": 4.601, + "args": { + "External id": 990092,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941023225.809, "dur": 13.544, + "args": { + "External id": 990093,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941023355.722, "dur": 3369.709, + "args": { + "External id": 990094,"Record function id": 0, "Ev Idx": 8077 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.21)", "pid": 2338710, "tid": 2379450, + "ts": 6345941023380.204, "dur": 1250.698, + "args": { + "External id": 990095,"Record function id": 0, "Ev Idx": 8078 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2338710, "tid": 2379450, + "ts": 6345941023397.308, "dur": 382.604, + "args": { + "External id": 990096,"Record function id": 0, "Ev Idx": 8079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941023497.169, "dur": 6.058, + "args": { + "External id": 990097,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941023506.600, "dur": 1.111, + "args": { + "External id": 990098,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941023509.773, "dur": 1.341, + "args": { + "External id": 990099,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941023513.340, "dur": 1.227, + "args": { + "External id": 990100,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941023516.462, "dur": 1.289, + "args": { + "External id": 990101,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941023519.205, "dur": 1.087, + "args": { + "External id": 990102,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941023524.592, "dur": 1.002, + "args": { + "External id": 990103,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941023527.347, "dur": 3.914, + "args": { + "External id": 990104,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941023532.892, "dur": 1.186, + "args": { + "External id": 990105,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941023536.011, "dur": 1.174, + "args": { + "External id": 990106,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941023560.137, "dur": 184.457, + "args": { + "External id": 990107,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941023582.024, "dur": 156.389, + "args": { + "External id": 990108,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941023603.175, "dur": 17.378, + "args": { + "External id": 990109,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941023626.116, "dur": 79.565, + "args": { + "External id": 990110,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941023629.419, "dur": 75.777, + "args": { + "External id": 990111,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941023634.580, "dur": 6.326, + "args": { + "External id": 990112,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941023643.085, "dur": 61.549, + "args": { + "External id": 990113,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8096 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.20", "pid": 2338710, "tid": 2379450, + "ts": 6345941023867.211, "dur": 755.171, + "args": { + "External id": 990114,"Record function id": 0, "Ev Idx": 8097 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2338710, "tid": 2379450, + "ts": 6345941023884.146, "dur": 724.101, + "args": { + "External id": 990115,"Record function id": 0, "Ev Idx": 8098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941023947.665, "dur": 7.062, + "args": { + "External id": 990116,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941023975.269, "dur": 53.460, + "args": { + "External id": 990117,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941023982.102, "dur": 1.699, + "args": { + "External id": 990118,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941023986.896, "dur": 1.697, + "args": { + "External id": 990119,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941023989.921, "dur": 3.180, + "args": { + "External id": 990120,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941023994.364, "dur": 0.718, + "args": { + "External id": 990121,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941023997.408, "dur": 0.405, + "args": { + "External id": 990122,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941023999.342, "dur": 0.425, + "args": { + "External id": 990123,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941024001.027, "dur": 0.534, + "args": { + "External id": 990124,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941024003.689, "dur": 0.384, + "args": { + "External id": 990125,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941024005.180, "dur": 0.309, + "args": { + "External id": 990126,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941024045.155, "dur": 93.918, + "args": { + "External id": 990127,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345941024181.314, "dur": 141.148, + "args": { + "External id": 990128,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941024195.610, "dur": 5.776, + "args": { + "External id": 990129,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345941024207.724, "dur": 15.569, + "args": { + "External id": 990130,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345941024212.713, "dur": 9.942, + "args": { + "External id": 990131,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941024217.195, "dur": 3.487, + "args": { + "External id": 990132,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941024232.011, "dur": 29.474, + "args": { + "External id": 990133,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941024235.152, "dur": 0.720, + "args": { + "External id": 990134,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941024238.371, "dur": 0.529, + "args": { + "External id": 990135,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941024240.108, "dur": 0.538, + "args": { + "External id": 990136,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941024242.466, "dur": 1.899, + "args": { + "External id": 990137,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941024245.652, "dur": 0.565, + "args": { + "External id": 990138,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941024247.722, "dur": 0.428, + "args": { + "External id": 990139,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941024250.980, "dur": 0.302, + "args": { + "External id": 990140,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941024252.427, "dur": 3.398, + "args": { + "External id": 990141,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941024257.620, "dur": 0.381, + "args": { + "External id": 990142,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941024277.547, "dur": 36.070, + "args": { + "External id": 990143,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941024380.585, "dur": 142.329, + "args": { + "External id": 990144,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941024415.741, "dur": 103.030, + "args": { + "External id": 990145,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8128, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941024427.294, "dur": 86.455, + "args": { + "External id": 990146,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941024543.626, "dur": 2.401, + "args": { + "External id": 990147,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8130, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941024639.095, "dur": 2060.942, + "args": { + "External id": 990148,"Sequence number": 10552515, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8131 + } + }, + { + "ph": "f", "id": 404, "pid": 2338710, "tid": 2379450, "ts": 6345941024639.095, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941024767.555, "dur": 127.578, + "args": { + "External id": 990149,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941024949.626, "dur": 45.737, + "args": { + "External id": 990150,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345941025043.477, "dur": 115.625, + "args": { + "External id": 990151,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941025176.666, "dur": 38.211, + "args": { + "External id": 990152,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941025221.977, "dur": 35.531, + "args": { + "External id": 990153,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941025264.804, "dur": 30.802, + "args": { + "External id": 990154,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941025305.729, "dur": 34.389, + "args": { + "External id": 990155,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941025373.388, "dur": 28.719, + "args": { + "External id": 990156,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941025425.105, "dur": 34.396, + "args": { + "External id": 990157,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941025485.218, "dur": 21.847, + "args": { + "External id": 990158,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941025524.271, "dur": 15.858, + "args": { + "External id": 990159,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941025551.139, "dur": 40.591, + "args": { + "External id": 990160,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941025595.439, "dur": 38.691, + "args": { + "External id": 990161,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345941025666.152, "dur": 314.884, + "args": { + "External id": 990162,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941025758.866, "dur": 8.081, + "args": { + "External id": 990163,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941025769.211, "dur": 3.130, + "args": { + "External id": 990164,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941025773.443, "dur": 4.949, + "args": { + "External id": 990165,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941025779.606, "dur": 4.360, + "args": { + "External id": 990166,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941025853.698, "dur": 6.343, + "args": { + "External id": 990167,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941025855.713, "dur": 3.673, + "args": { + "External id": 990168,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941025862.076, "dur": 38.903, + "args": { + "External id": 990169,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941025868.439, "dur": 1.897, + "args": { + "External id": 990170,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941025902.966, "dur": 1.586, + "args": { + "External id": 990171,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941025903.731, "dur": 0.745, + "args": { + "External id": 990172,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941025905.434, "dur": 16.037, + "args": { + "External id": 990173,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941025907.613, "dur": 0.903, + "args": { + "External id": 990174,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941026049.739, "dur": 72.465, + "args": { + "External id": 990175,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941026149.625, "dur": 20.616, + "args": { + "External id": 990176,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941026180.354, "dur": 58.846, + "args": { + "External id": 990177,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941026246.647, "dur": 45.504, + "args": { + "External id": 990178,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941026300.789, "dur": 23.100, + "args": { + "External id": 990179,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941026332.396, "dur": 34.011, + "args": { + "External id": 990180,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941026374.059, "dur": 31.962, + "args": { + "External id": 990181,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941026412.665, "dur": 34.003, + "args": { + "External id": 990182,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345941026473.423, "dur": 26.695, + "args": { + "External id": 990183,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 8166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941026519.455, "dur": 29.416, + "args": { + "External id": 990184,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941026568.226, "dur": 20.834, + "args": { + "External id": 990185,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941026608.753, "dur": 16.165, + "args": { + "External id": 990186,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345941026641.846, "dur": 18.839, + "args": { + "External id": 990187,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941026750.770, "dur": 17.188, + "args": { + "External id": 990188,"Record function id": 0, "Ev Idx": 8171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941026754.376, "dur": 12.413, + "args": { + "External id": 990189,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941026759.229, "dur": 6.377, + "args": { + "External id": 990190,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941026761.166, "dur": 4.310, + "args": { + "External id": 990191,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941026772.403, "dur": 5.652, + "args": { + "External id": 990192,"Record function id": 0, "Ev Idx": 8175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941026774.194, "dur": 3.241, + "args": { + "External id": 990193,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941026775.149, "dur": 1.713, + "args": { + "External id": 990194,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941026775.909, "dur": 0.853, + "args": { + "External id": 990195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941026782.241, "dur": 4.453, + "args": { + "External id": 990196,"Record function id": 0, "Ev Idx": 8179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941026783.391, "dur": 2.776, + "args": { + "External id": 990197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941026784.077, "dur": 1.599, + "args": { + "External id": 990198,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941026784.847, "dur": 0.711, + "args": { + "External id": 990199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941026790.465, "dur": 4.790, + "args": { + "External id": 990200,"Record function id": 0, "Ev Idx": 8183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941026791.936, "dur": 2.815, + "args": { + "External id": 990201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941026792.568, "dur": 1.607, + "args": { + "External id": 990202,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941026793.235, "dur": 0.817, + "args": { + "External id": 990203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941026798.878, "dur": 4.278, + "args": { + "External id": 990204,"Record function id": 0, "Ev Idx": 8187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941026800.074, "dur": 2.603, + "args": { + "External id": 990205,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941026800.615, "dur": 1.440, + "args": { + "External id": 990206,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941026801.139, "dur": 0.807, + "args": { + "External id": 990207,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941026806.754, "dur": 41.671, + "args": { + "External id": 990208,"Record function id": 0, "Ev Idx": 8191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941026842.812, "dur": 5.098, + "args": { + "External id": 990209,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941026843.856, "dur": 3.475, + "args": { + "External id": 990210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941026846.324, "dur": 0.823, + "args": { + "External id": 990211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941026852.329, "dur": 7.070, + "args": { + "External id": 990212,"Record function id": 0, "Ev Idx": 8195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941026853.968, "dur": 4.928, + "args": { + "External id": 990213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941026854.762, "dur": 3.586, + "args": { + "External id": 990214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941026855.099, "dur": 3.130, + "args": { + "External id": 990215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941026863.020, "dur": 4.427, + "args": { + "External id": 990216,"Record function id": 0, "Ev Idx": 8199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941026864.248, "dur": 2.714, + "args": { + "External id": 990217,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941026865.021, "dur": 1.461, + "args": { + "External id": 990218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941026865.588, "dur": 0.796, + "args": { + "External id": 990219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941026871.165, "dur": 4.227, + "args": { + "External id": 990220,"Record function id": 0, "Ev Idx": 8203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941026872.332, "dur": 2.601, + "args": { + "External id": 990221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941026872.913, "dur": 1.554, + "args": { + "External id": 990222,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941026873.454, "dur": 0.890, + "args": { + "External id": 990223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941026879.800, "dur": 63636.605, + "args": { + "External id": 990224,"Record function id": 0, "Sequence number": 10552514, "Fwd thread id": 1, "Ev Idx": 8207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941026881.186, "dur": 63624.984, + "args": { + "External id": 990225,"Sequence number": 10552514, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8208 + } + }, + { + "ph": "f", "id": 405, "pid": 2338710, "tid": 2379450, "ts": 6345941026881.186, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.21)", "pid": 2338710, "tid": 2379450, + "ts": 6345941026915.228, "dur": 45.256, + "args": { + "External id": 990226,"Record function id": 0, "Ev Idx": 8209 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.21)", "pid": 2338710, "tid": 2379450, + "ts": 6345941026969.366, "dur": 137.468, + "args": { + "External id": 990227,"Record function id": 0, "Ev Idx": 8210 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.21)", "pid": 2338710, "tid": 2379450, + "ts": 6345941027117.492, "dur": 63377.881, + "args": { + "External id": 990228,"Record function id": 0, "Ev Idx": 8211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941027228.384, "dur": 9.173, + "args": { + "External id": 990229,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941027250.333, "dur": 6.026, + "args": { + "External id": 990230,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941027275.372, "dur": 62077.329, + "args": { + "External id": 990231,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941027293.593, "dur": 62042.188, + "args": { + "External id": 990232,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941027400.768, "dur": 22.449, + "args": { + "External id": 990233,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941027447.733, "dur": 61828.824, + "args": { + "External id": 990234,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941027451.909, "dur": 61823.321, + "args": { + "External id": 990235,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941027457.021, "dur": 10.490, + "args": { + "External id": 990236,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941027470.171, "dur": 61798.208, + "args": { + "External id": 990237,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941089484.995, "dur": 15.595, + "args": { + "External id": 990238,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941089489.820, "dur": 10.265, + "args": { + "External id": 990239,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941089540.036, "dur": 434.659, + "args": { + "External id": 990240,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941089581.031, "dur": 388.038, + "args": { + "External id": 990241,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8224, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941089599.900, "dur": 361.953, + "args": { + "External id": 990242,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941090004.173, "dur": 2.635, + "args": { + "External id": 990243,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8226, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941090143.642, "dur": 9.236, + "args": { + "External id": 990244,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941090171.062, "dur": 49.500, + "args": { + "External id": 990245,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941090232.202, "dur": 4.067, + "args": { + "External id": 990246,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941090242.637, "dur": 16.013, + "args": { + "External id": 990247,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941090265.618, "dur": 1.131, + "args": { + "External id": 990248,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941090273.888, "dur": 12.695, + "args": { + "External id": 990249,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941090291.919, "dur": 1.120, + "args": { + "External id": 990250,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941090298.267, "dur": 11.224, + "args": { + "External id": 990251,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941090314.469, "dur": 0.933, + "args": { + "External id": 990252,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941090319.855, "dur": 12.022, + "args": { + "External id": 990253,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941090336.807, "dur": 1.550, + "args": { + "External id": 990254,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941090342.428, "dur": 10.989, + "args": { + "External id": 990255,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941090360.401, "dur": 1.014, + "args": { + "External id": 990256,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941090365.823, "dur": 12.774, + "args": { + "External id": 990257,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941090383.099, "dur": 0.968, + "args": { + "External id": 990258,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941090388.249, "dur": 12.948, + "args": { + "External id": 990259,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941090405.467, "dur": 0.927, + "args": { + "External id": 990260,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941090412.871, "dur": 13.111, + "args": { + "External id": 990261,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941090534.101, "dur": 3335.098, + "args": { + "External id": 990262,"Record function id": 0, "Ev Idx": 8245 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.20)", "pid": 2338710, "tid": 2379450, + "ts": 6345941090556.839, "dur": 1212.599, + "args": { + "External id": 990263,"Record function id": 0, "Ev Idx": 8246 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2338710, "tid": 2379450, + "ts": 6345941090575.637, "dur": 359.105, + "args": { + "External id": 990264,"Record function id": 0, "Ev Idx": 8247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941090667.088, "dur": 6.804, + "args": { + "External id": 990265,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941090677.577, "dur": 0.824, + "args": { + "External id": 990266,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941090680.515, "dur": 1.062, + "args": { + "External id": 990267,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941090684.126, "dur": 0.963, + "args": { + "External id": 990268,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941090686.816, "dur": 0.938, + "args": { + "External id": 990269,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941090689.760, "dur": 1.057, + "args": { + "External id": 990270,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941090692.952, "dur": 0.803, + "args": { + "External id": 990271,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941090697.876, "dur": 1.248, + "args": { + "External id": 990272,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941090700.746, "dur": 3.031, + "args": { + "External id": 990273,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941090705.832, "dur": 0.967, + "args": { + "External id": 990274,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941090727.311, "dur": 174.484, + "args": { + "External id": 990275,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941090747.174, "dur": 149.392, + "args": { + "External id": 990276,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941090769.217, "dur": 17.916, + "args": { + "External id": 990277,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941090792.357, "dur": 74.509, + "args": { + "External id": 990278,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941090795.531, "dur": 70.942, + "args": { + "External id": 990279,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941090799.752, "dur": 6.290, + "args": { + "External id": 990280,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941090808.197, "dur": 57.479, + "args": { + "External id": 990281,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8264 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.19", "pid": 2338710, "tid": 2379450, + "ts": 6345941091047.695, "dur": 712.912, + "args": { + "External id": 990282,"Record function id": 0, "Ev Idx": 8265 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2338710, "tid": 2379450, + "ts": 6345941091110.558, "dur": 636.229, + "args": { + "External id": 990283,"Record function id": 0, "Ev Idx": 8266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941091179.613, "dur": 8.751, + "args": { + "External id": 990284,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941091206.899, "dur": 35.808, + "args": { + "External id": 990285,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941091212.871, "dur": 1.818, + "args": { + "External id": 990286,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941091216.940, "dur": 1.996, + "args": { + "External id": 990287,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941091220.723, "dur": 0.622, + "args": { + "External id": 990288,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941091222.579, "dur": 2.353, + "args": { + "External id": 990289,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941091228.845, "dur": 0.427, + "args": { + "External id": 990290,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941091230.951, "dur": 0.293, + "args": { + "External id": 990291,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941091232.812, "dur": 0.355, + "args": { + "External id": 990292,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941091236.240, "dur": 0.352, + "args": { + "External id": 990293,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941091238.202, "dur": 0.309, + "args": { + "External id": 990294,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941091254.911, "dur": 52.268, + "args": { + "External id": 990295,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345941091343.507, "dur": 129.560, + "args": { + "External id": 990296,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941091355.200, "dur": 4.246, + "args": { + "External id": 990297,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345941091365.498, "dur": 12.108, + "args": { + "External id": 990298,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345941091370.618, "dur": 6.495, + "args": { + "External id": 990299,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941091374.764, "dur": 0.663, + "args": { + "External id": 990300,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941091389.971, "dur": 29.203, + "args": { + "External id": 990301,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941091391.680, "dur": 2.697, + "args": { + "External id": 990302,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941091397.321, "dur": 0.564, + "args": { + "External id": 990303,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941091399.631, "dur": 0.478, + "args": { + "External id": 990304,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941091401.367, "dur": 1.576, + "args": { + "External id": 990305,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941091404.583, "dur": 0.337, + "args": { + "External id": 990306,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941091406.138, "dur": 0.281, + "args": { + "External id": 990307,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941091409.627, "dur": 0.416, + "args": { + "External id": 990308,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941091411.269, "dur": 0.289, + "args": { + "External id": 990309,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941091412.824, "dur": 2.915, + "args": { + "External id": 990310,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941091431.573, "dur": 33.040, + "args": { + "External id": 990311,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941091524.882, "dur": 142.171, + "args": { + "External id": 990312,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941091559.554, "dur": 103.613, + "args": { + "External id": 990313,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8296, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941091570.656, "dur": 87.231, + "args": { + "External id": 990314,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941091686.466, "dur": 1.837, + "args": { + "External id": 990315,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8298, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941091778.017, "dur": 2064.068, + "args": { + "External id": 990316,"Sequence number": 10552513, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8299 + } + }, + { + "ph": "f", "id": 406, "pid": 2338710, "tid": 2379450, "ts": 6345941091778.017, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941091900.133, "dur": 193.613, + "args": { + "External id": 990317,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941092152.750, "dur": 48.719, + "args": { + "External id": 990318,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345941092224.658, "dur": 69.980, + "args": { + "External id": 990319,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941092306.148, "dur": 36.876, + "args": { + "External id": 990320,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941092350.958, "dur": 37.557, + "args": { + "External id": 990321,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941092395.639, "dur": 31.526, + "args": { + "External id": 990322,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941092437.592, "dur": 32.434, + "args": { + "External id": 990323,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941092498.374, "dur": 27.645, + "args": { + "External id": 990324,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941092547.894, "dur": 32.669, + "args": { + "External id": 990325,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941092606.659, "dur": 21.678, + "args": { + "External id": 990326,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941092646.098, "dur": 16.781, + "args": { + "External id": 990327,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941092674.007, "dur": 41.361, + "args": { + "External id": 990328,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941092719.067, "dur": 35.934, + "args": { + "External id": 990329,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345941092789.062, "dur": 394.180, + "args": { + "External id": 990330,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941092884.905, "dur": 7.422, + "args": { + "External id": 990331,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941092894.610, "dur": 3.722, + "args": { + "External id": 990332,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941092899.608, "dur": 2.723, + "args": { + "External id": 990333,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941092903.680, "dur": 4.900, + "args": { + "External id": 990334,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941092975.446, "dur": 10.157, + "args": { + "External id": 990335,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941092980.976, "dur": 4.028, + "args": { + "External id": 990336,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941092988.113, "dur": 57.401, + "args": { + "External id": 990337,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941092996.284, "dur": 2.056, + "args": { + "External id": 990338,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941093048.894, "dur": 2.124, + "args": { + "External id": 990339,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941093049.953, "dur": 0.902, + "args": { + "External id": 990340,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941093088.924, "dur": 25.410, + "args": { + "External id": 990341,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941093092.645, "dur": 1.213, + "args": { + "External id": 990342,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941093232.015, "dur": 34.091, + "args": { + "External id": 990343,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941093293.761, "dur": 20.312, + "args": { + "External id": 990344,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941093323.151, "dur": 58.702, + "args": { + "External id": 990345,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941093390.138, "dur": 44.850, + "args": { + "External id": 990346,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941093443.993, "dur": 24.482, + "args": { + "External id": 990347,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941093478.096, "dur": 34.677, + "args": { + "External id": 990348,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941093521.015, "dur": 31.224, + "args": { + "External id": 990349,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941093560.903, "dur": 36.411, + "args": { + "External id": 990350,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345941093621.368, "dur": 25.672, + "args": { + "External id": 990351,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 8334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941093670.945, "dur": 25.808, + "args": { + "External id": 990352,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941093716.095, "dur": 19.782, + "args": { + "External id": 990353,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941093753.688, "dur": 15.431, + "args": { + "External id": 990354,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345941093787.433, "dur": 18.182, + "args": { + "External id": 990355,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941093894.767, "dur": 17.287, + "args": { + "External id": 990356,"Record function id": 0, "Ev Idx": 8339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941093898.864, "dur": 12.006, + "args": { + "External id": 990357,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941093903.898, "dur": 5.920, + "args": { + "External id": 990358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941093905.569, "dur": 4.138, + "args": { + "External id": 990359,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941093917.036, "dur": 5.726, + "args": { + "External id": 990360,"Record function id": 0, "Ev Idx": 8343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941093919.038, "dur": 3.269, + "args": { + "External id": 990361,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941093919.718, "dur": 2.025, + "args": { + "External id": 990362,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941093920.401, "dur": 1.223, + "args": { + "External id": 990363,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941093926.974, "dur": 5.502, + "args": { + "External id": 990364,"Record function id": 0, "Ev Idx": 8347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941093928.726, "dur": 3.240, + "args": { + "External id": 990365,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941093929.564, "dur": 1.854, + "args": { + "External id": 990366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941093930.568, "dur": 0.732, + "args": { + "External id": 990367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941093936.254, "dur": 3.867, + "args": { + "External id": 990368,"Record function id": 0, "Ev Idx": 8351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941093937.400, "dur": 2.245, + "args": { + "External id": 990369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941093937.960, "dur": 1.200, + "args": { + "External id": 990370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941093938.325, "dur": 0.758, + "args": { + "External id": 990371,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941093943.731, "dur": 4.379, + "args": { + "External id": 990372,"Record function id": 0, "Ev Idx": 8355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941093945.059, "dur": 2.553, + "args": { + "External id": 990373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941093945.714, "dur": 1.189, + "args": { + "External id": 990374,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941093946.049, "dur": 0.779, + "args": { + "External id": 990375,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941093951.731, "dur": 6.423, + "args": { + "External id": 990376,"Record function id": 0, "Ev Idx": 8359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941093952.972, "dur": 4.700, + "args": { + "External id": 990377,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941093953.586, "dur": 3.617, + "args": { + "External id": 990378,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941093954.028, "dur": 3.102, + "args": { + "External id": 990379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941093961.854, "dur": 4.733, + "args": { + "External id": 990380,"Record function id": 0, "Ev Idx": 8363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941093963.660, "dur": 2.420, + "args": { + "External id": 990381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941093964.271, "dur": 1.329, + "args": { + "External id": 990382,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941093964.666, "dur": 0.816, + "args": { + "External id": 990383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941093970.201, "dur": 6.693, + "args": { + "External id": 990384,"Record function id": 0, "Ev Idx": 8367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941093971.540, "dur": 4.867, + "args": { + "External id": 990385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941093972.127, "dur": 3.829, + "args": { + "External id": 990386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941093975.038, "dur": 0.837, + "args": { + "External id": 990387,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941093980.548, "dur": 4.258, + "args": { + "External id": 990388,"Record function id": 0, "Ev Idx": 8371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941093981.945, "dur": 2.374, + "args": { + "External id": 990389,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941093982.534, "dur": 1.329, + "args": { + "External id": 990390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941093982.868, "dur": 0.919, + "args": { + "External id": 990391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941093989.460, "dur": 63235.765, + "args": { + "External id": 990392,"Record function id": 0, "Sequence number": 10552512, "Fwd thread id": 1, "Ev Idx": 8375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941093990.882, "dur": 63223.497, + "args": { + "External id": 990393,"Sequence number": 10552512, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8376 + } + }, + { + "ph": "f", "id": 407, "pid": 2338710, "tid": 2379450, "ts": 6345941093990.882, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.20)", "pid": 2338710, "tid": 2379450, + "ts": 6345941094048.128, "dur": 88.635, + "args": { + "External id": 990394,"Record function id": 0, "Ev Idx": 8377 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.20)", "pid": 2338710, "tid": 2379450, + "ts": 6345941094147.848, "dur": 78.228, + "args": { + "External id": 990395,"Record function id": 0, "Ev Idx": 8378 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.20)", "pid": 2338710, "tid": 2379450, + "ts": 6345941094232.954, "dur": 62971.307, + "args": { + "External id": 990396,"Record function id": 0, "Ev Idx": 8379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941094346.238, "dur": 9.101, + "args": { + "External id": 990397,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941094368.182, "dur": 6.307, + "args": { + "External id": 990398,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941094394.935, "dur": 61593.816, + "args": { + "External id": 990399,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941094413.472, "dur": 61558.352, + "args": { + "External id": 990400,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941094520.402, "dur": 22.799, + "args": { + "External id": 990401,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941094568.475, "dur": 61348.244, + "args": { + "External id": 990402,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941094572.714, "dur": 61342.698, + "args": { + "External id": 990403,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941094577.995, "dur": 10.385, + "args": { + "External id": 990404,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941094591.002, "dur": 61317.288, + "args": { + "External id": 990405,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941156180.105, "dur": 16.756, + "args": { + "External id": 990406,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941156185.437, "dur": 10.520, + "args": { + "External id": 990407,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941156239.199, "dur": 433.200, + "args": { + "External id": 990408,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941156281.311, "dur": 384.140, + "args": { + "External id": 990409,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8392, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941156299.891, "dur": 358.428, + "args": { + "External id": 990410,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941156699.931, "dur": 2.501, + "args": { + "External id": 990411,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8394, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941156770.085, "dur": 8.589, + "args": { + "External id": 990412,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941156796.552, "dur": 43.492, + "args": { + "External id": 990413,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941156851.579, "dur": 4.953, + "args": { + "External id": 990414,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941156862.359, "dur": 16.431, + "args": { + "External id": 990415,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941156884.693, "dur": 1.263, + "args": { + "External id": 990416,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941156891.686, "dur": 15.013, + "args": { + "External id": 990417,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941156914.652, "dur": 1.140, + "args": { + "External id": 990418,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941156921.097, "dur": 14.160, + "args": { + "External id": 990419,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941156940.319, "dur": 1.056, + "args": { + "External id": 990420,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941156946.334, "dur": 14.692, + "args": { + "External id": 990421,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941156965.738, "dur": 1.481, + "args": { + "External id": 990422,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941156972.093, "dur": 13.249, + "args": { + "External id": 990423,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941156989.924, "dur": 0.878, + "args": { + "External id": 990424,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941156997.651, "dur": 34.669, + "args": { + "External id": 990425,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941157041.565, "dur": 2.799, + "args": { + "External id": 990426,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941157049.143, "dur": 50.244, + "args": { + "External id": 990427,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941157108.360, "dur": 1.814, + "args": { + "External id": 990428,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941157115.579, "dur": 16.996, + "args": { + "External id": 990429,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941157246.002, "dur": 3432.218, + "args": { + "External id": 990430,"Record function id": 0, "Ev Idx": 8413 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.19)", "pid": 2338710, "tid": 2379450, + "ts": 6345941157270.846, "dur": 1249.322, + "args": { + "External id": 990431,"Record function id": 0, "Ev Idx": 8414 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2338710, "tid": 2379450, + "ts": 6345941157290.685, "dur": 387.668, + "args": { + "External id": 990432,"Record function id": 0, "Ev Idx": 8415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941157390.447, "dur": 8.163, + "args": { + "External id": 990433,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941157403.884, "dur": 1.031, + "args": { + "External id": 990434,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941157407.345, "dur": 0.672, + "args": { + "External id": 990435,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941157410.376, "dur": 0.905, + "args": { + "External id": 990436,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941157415.554, "dur": 1.018, + "args": { + "External id": 990437,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941157418.666, "dur": 0.956, + "args": { + "External id": 990438,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941157421.247, "dur": 0.987, + "args": { + "External id": 990439,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941157424.097, "dur": 1.286, + "args": { + "External id": 990440,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941157429.274, "dur": 3.022, + "args": { + "External id": 990441,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941157433.891, "dur": 1.015, + "args": { + "External id": 990442,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941157459.718, "dur": 181.299, + "args": { + "External id": 990443,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941157481.109, "dur": 154.890, + "args": { + "External id": 990444,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941157500.421, "dur": 21.343, + "args": { + "External id": 990445,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941157527.255, "dur": 79.080, + "args": { + "External id": 990446,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941157532.722, "dur": 73.174, + "args": { + "External id": 990447,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941157536.684, "dur": 6.541, + "args": { + "External id": 990448,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941157545.348, "dur": 59.874, + "args": { + "External id": 990449,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8432 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.18", "pid": 2338710, "tid": 2379450, + "ts": 6345941157771.344, "dur": 739.589, + "args": { + "External id": 990450,"Record function id": 0, "Ev Idx": 8433 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2338710, "tid": 2379450, + "ts": 6345941157788.236, "dur": 708.461, + "args": { + "External id": 990451,"Record function id": 0, "Ev Idx": 8434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941157849.805, "dur": 6.822, + "args": { + "External id": 990452,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941157875.262, "dur": 32.740, + "args": { + "External id": 990453,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941157880.804, "dur": 2.046, + "args": { + "External id": 990454,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941157884.950, "dur": 1.799, + "args": { + "External id": 990455,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941157888.379, "dur": 0.608, + "args": { + "External id": 990456,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941157890.430, "dur": 2.394, + "args": { + "External id": 990457,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941157895.497, "dur": 0.462, + "args": { + "External id": 990458,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941157897.199, "dur": 0.641, + "args": { + "External id": 990459,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941157899.040, "dur": 0.360, + "args": { + "External id": 990460,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941157901.989, "dur": 0.561, + "args": { + "External id": 990461,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941157903.660, "dur": 0.677, + "args": { + "External id": 990462,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941157919.238, "dur": 48.390, + "args": { + "External id": 990463,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345941158002.499, "dur": 207.348, + "args": { + "External id": 990464,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941158038.686, "dur": 6.503, + "args": { + "External id": 990465,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345941158091.557, "dur": 15.352, + "args": { + "External id": 990466,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345941158098.210, "dur": 8.240, + "args": { + "External id": 990467,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941158102.702, "dur": 1.205, + "args": { + "External id": 990468,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941158116.361, "dur": 30.909, + "args": { + "External id": 990469,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941158119.112, "dur": 2.862, + "args": { + "External id": 990470,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941158126.160, "dur": 0.683, + "args": { + "External id": 990471,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941158128.036, "dur": 0.586, + "args": { + "External id": 990472,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941158129.805, "dur": 2.125, + "args": { + "External id": 990473,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941158133.195, "dur": 0.651, + "args": { + "External id": 990474,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941158135.000, "dur": 0.565, + "args": { + "External id": 990475,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941158137.965, "dur": 0.279, + "args": { + "External id": 990476,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941158139.240, "dur": 0.565, + "args": { + "External id": 990477,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941158141.006, "dur": 2.533, + "args": { + "External id": 990478,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941158161.586, "dur": 39.004, + "args": { + "External id": 990479,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941158266.302, "dur": 144.182, + "args": { + "External id": 990480,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941158301.899, "dur": 104.490, + "args": { + "External id": 990481,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8464, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941158312.729, "dur": 88.648, + "args": { + "External id": 990482,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941158432.834, "dur": 1.953, + "args": { + "External id": 990483,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8466, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941158528.648, "dur": 2122.848, + "args": { + "External id": 990484,"Sequence number": 10552511, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8467 + } + }, + { + "ph": "f", "id": 408, "pid": 2338710, "tid": 2379450, "ts": 6345941158528.648, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941158653.603, "dur": 125.400, + "args": { + "External id": 990485,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941158827.663, "dur": 46.436, + "args": { + "External id": 990486,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345941158896.970, "dur": 58.023, + "args": { + "External id": 990487,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941158966.407, "dur": 35.126, + "args": { + "External id": 990488,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941159033.579, "dur": 88.813, + "args": { + "External id": 990489,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941159139.306, "dur": 35.902, + "args": { + "External id": 990490,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941159186.312, "dur": 37.568, + "args": { + "External id": 990491,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941159256.553, "dur": 29.748, + "args": { + "External id": 990492,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941159306.875, "dur": 37.684, + "args": { + "External id": 990493,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941159371.462, "dur": 26.818, + "args": { + "External id": 990494,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941159416.180, "dur": 17.666, + "args": { + "External id": 990495,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941159444.819, "dur": 43.770, + "args": { + "External id": 990496,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941159492.844, "dur": 37.954, + "args": { + "External id": 990497,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345941159562.354, "dur": 341.879, + "args": { + "External id": 990498,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941159676.706, "dur": 10.145, + "args": { + "External id": 990499,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941159689.313, "dur": 3.972, + "args": { + "External id": 990500,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941159694.481, "dur": 2.823, + "args": { + "External id": 990501,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941159698.385, "dur": 5.803, + "args": { + "External id": 990502,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941159771.325, "dur": 6.588, + "args": { + "External id": 990503,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941159773.720, "dur": 3.675, + "args": { + "External id": 990504,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941159780.180, "dur": 40.365, + "args": { + "External id": 990505,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941159788.684, "dur": 2.071, + "args": { + "External id": 990506,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941159824.378, "dur": 2.284, + "args": { + "External id": 990507,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941159825.752, "dur": 0.827, + "args": { + "External id": 990508,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941159827.721, "dur": 21.855, + "args": { + "External id": 990509,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941159830.115, "dur": 0.658, + "args": { + "External id": 990510,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941159952.824, "dur": 33.731, + "args": { + "External id": 990511,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941160026.486, "dur": 24.946, + "args": { + "External id": 990512,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941160103.749, "dur": 66.324, + "args": { + "External id": 990513,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941160178.070, "dur": 49.052, + "args": { + "External id": 990514,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941160236.983, "dur": 25.237, + "args": { + "External id": 990515,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941160271.126, "dur": 37.233, + "args": { + "External id": 990516,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941160315.919, "dur": 32.406, + "args": { + "External id": 990517,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941160354.860, "dur": 34.737, + "args": { + "External id": 990518,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345941160418.303, "dur": 29.201, + "args": { + "External id": 990519,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 8502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941160468.734, "dur": 27.835, + "args": { + "External id": 990520,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941160517.361, "dur": 23.510, + "args": { + "External id": 990521,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941160556.226, "dur": 16.013, + "args": { + "External id": 990522,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345941160595.755, "dur": 20.496, + "args": { + "External id": 990523,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941160704.371, "dur": 17.380, + "args": { + "External id": 990524,"Record function id": 0, "Ev Idx": 8507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941160708.269, "dur": 12.286, + "args": { + "External id": 990525,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941160713.186, "dur": 6.244, + "args": { + "External id": 990526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941160714.969, "dur": 4.356, + "args": { + "External id": 990527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941160726.407, "dur": 6.192, + "args": { + "External id": 990528,"Record function id": 0, "Ev Idx": 8511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941160728.504, "dur": 3.585, + "args": { + "External id": 990529,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941160729.564, "dur": 1.974, + "args": { + "External id": 990530,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941160730.284, "dur": 1.103, + "args": { + "External id": 990531,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941160736.506, "dur": 6.025, + "args": { + "External id": 990532,"Record function id": 0, "Ev Idx": 8515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941160738.097, "dur": 3.861, + "args": { + "External id": 990533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941160739.366, "dur": 2.062, + "args": { + "External id": 990534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941160740.442, "dur": 0.868, + "args": { + "External id": 990535,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941160746.316, "dur": 5.106, + "args": { + "External id": 990536,"Record function id": 0, "Ev Idx": 8519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941160748.022, "dur": 2.918, + "args": { + "External id": 990537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941160748.651, "dur": 1.819, + "args": { + "External id": 990538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941160749.375, "dur": 0.978, + "args": { + "External id": 990539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941160755.038, "dur": 4.712, + "args": { + "External id": 990540,"Record function id": 0, "Ev Idx": 8523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941160756.488, "dur": 2.779, + "args": { + "External id": 990541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941160757.189, "dur": 1.559, + "args": { + "External id": 990542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941160757.681, "dur": 0.984, + "args": { + "External id": 990543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941160763.399, "dur": 7.471, + "args": { + "External id": 990544,"Record function id": 0, "Ev Idx": 8527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941160765.190, "dur": 5.171, + "args": { + "External id": 990545,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941160765.774, "dur": 4.071, + "args": { + "External id": 990546,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941160766.619, "dur": 3.153, + "args": { + "External id": 990547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941160774.562, "dur": 4.589, + "args": { + "External id": 990548,"Record function id": 0, "Ev Idx": 8531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941160776.229, "dur": 2.436, + "args": { + "External id": 990549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941160776.775, "dur": 1.392, + "args": { + "External id": 990550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941160777.095, "dur": 0.998, + "args": { + "External id": 990551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941160782.796, "dur": 8.270, + "args": { + "External id": 990552,"Record function id": 0, "Ev Idx": 8535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941160788.276, "dur": 2.305, + "args": { + "External id": 990553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941160788.925, "dur": 1.178, + "args": { + "External id": 990554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941160789.241, "dur": 0.772, + "args": { + "External id": 990555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941160794.925, "dur": 6.335, + "args": { + "External id": 990556,"Record function id": 0, "Ev Idx": 8539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941160796.024, "dur": 4.724, + "args": { + "External id": 990557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941160796.592, "dur": 3.679, + "args": { + "External id": 990558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941160799.345, "dur": 0.846, + "args": { + "External id": 990559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941160806.269, "dur": 62924.808, + "args": { + "External id": 990560,"Record function id": 0, "Sequence number": 10552510, "Fwd thread id": 1, "Ev Idx": 8543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941160815.350, "dur": 62905.147, + "args": { + "External id": 990561,"Sequence number": 10552510, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8544 + } + }, + { + "ph": "f", "id": 409, "pid": 2338710, "tid": 2379450, "ts": 6345941160815.350, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.19)", "pid": 2338710, "tid": 2379450, + "ts": 6345941160853.970, "dur": 45.799, + "args": { + "External id": 990562,"Record function id": 0, "Ev Idx": 8545 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.19)", "pid": 2338710, "tid": 2379450, + "ts": 6345941160909.068, "dur": 74.253, + "args": { + "External id": 990563,"Record function id": 0, "Ev Idx": 8546 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.19)", "pid": 2338710, "tid": 2379450, + "ts": 6345941160990.198, "dur": 62720.663, + "args": { + "External id": 990564,"Record function id": 0, "Ev Idx": 8547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941161164.287, "dur": 8.868, + "args": { + "External id": 990565,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941161186.698, "dur": 6.024, + "args": { + "External id": 990566,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941161209.950, "dur": 61261.656, + "args": { + "External id": 990567,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941161228.646, "dur": 61226.714, + "args": { + "External id": 990568,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941161335.453, "dur": 22.521, + "args": { + "External id": 990569,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941161384.495, "dur": 61009.211, + "args": { + "External id": 990570,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941161391.237, "dur": 61001.149, + "args": { + "External id": 990571,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941161396.610, "dur": 11.085, + "args": { + "External id": 990572,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941161410.188, "dur": 60975.595, + "args": { + "External id": 990573,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941222616.583, "dur": 16.741, + "args": { + "External id": 990574,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941222621.434, "dur": 11.282, + "args": { + "External id": 990575,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941222673.356, "dur": 552.902, + "args": { + "External id": 990576,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941222715.703, "dur": 502.306, + "args": { + "External id": 990577,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8560, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941222733.281, "dur": 476.257, + "args": { + "External id": 990578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941223259.794, "dur": 2.830, + "args": { + "External id": 990579,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8562, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941223341.906, "dur": 9.293, + "args": { + "External id": 990580,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941223370.423, "dur": 41.448, + "args": { + "External id": 990581,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941223425.789, "dur": 5.069, + "args": { + "External id": 990582,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941223437.644, "dur": 17.614, + "args": { + "External id": 990583,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941223462.149, "dur": 1.185, + "args": { + "External id": 990584,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941223469.030, "dur": 15.596, + "args": { + "External id": 990585,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941223490.109, "dur": 1.080, + "args": { + "External id": 990586,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941223496.480, "dur": 14.581, + "args": { + "External id": 990587,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941223518.644, "dur": 1.100, + "args": { + "External id": 990588,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941223525.476, "dur": 15.864, + "args": { + "External id": 990589,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941223545.930, "dur": 1.312, + "args": { + "External id": 990590,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941223552.124, "dur": 13.894, + "args": { + "External id": 990591,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941223571.223, "dur": 0.949, + "args": { + "External id": 990592,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941223576.748, "dur": 14.882, + "args": { + "External id": 990593,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941223598.613, "dur": 0.914, + "args": { + "External id": 990594,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941223605.110, "dur": 13.494, + "args": { + "External id": 990595,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941223623.331, "dur": 0.790, + "args": { + "External id": 990596,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941223628.564, "dur": 14.334, + "args": { + "External id": 990597,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941223748.541, "dur": 3480.698, + "args": { + "External id": 990598,"Record function id": 0, "Ev Idx": 8581 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.18)", "pid": 2338710, "tid": 2379450, + "ts": 6345941223771.786, "dur": 1341.146, + "args": { + "External id": 990599,"Record function id": 0, "Ev Idx": 8582 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2338710, "tid": 2379450, + "ts": 6345941223789.405, "dur": 466.250, + "args": { + "External id": 990600,"Record function id": 0, "Ev Idx": 8583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941223883.167, "dur": 6.520, + "args": { + "External id": 990601,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941223893.651, "dur": 1.114, + "args": { + "External id": 990602,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941223897.226, "dur": 1.117, + "args": { + "External id": 990603,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941223900.360, "dur": 1.033, + "args": { + "External id": 990604,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941223903.517, "dur": 0.973, + "args": { + "External id": 990605,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941223908.267, "dur": 0.967, + "args": { + "External id": 990606,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941223911.518, "dur": 4.275, + "args": { + "External id": 990607,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941223917.629, "dur": 1.444, + "args": { + "External id": 990608,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941223920.844, "dur": 3.321, + "args": { + "External id": 990609,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941223928.031, "dur": 1.260, + "args": { + "External id": 990610,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941223949.946, "dur": 264.117, + "args": { + "External id": 990611,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941223971.186, "dur": 235.852, + "args": { + "External id": 990612,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941223994.332, "dur": 37.835, + "args": { + "External id": 990613,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941224038.774, "dur": 128.960, + "args": { + "External id": 990614,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941224042.198, "dur": 125.076, + "args": { + "External id": 990615,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941224047.164, "dur": 51.510, + "args": { + "External id": 990616,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941224103.213, "dur": 63.212, + "args": { + "External id": 990617,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8600 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.17", "pid": 2338710, "tid": 2379450, + "ts": 6345941224351.404, "dur": 750.486, + "args": { + "External id": 990618,"Record function id": 0, "Ev Idx": 8601 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2338710, "tid": 2379450, + "ts": 6345941224369.543, "dur": 671.449, + "args": { + "External id": 990619,"Record function id": 0, "Ev Idx": 8602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941224438.027, "dur": 7.631, + "args": { + "External id": 990620,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941224464.454, "dur": 34.298, + "args": { + "External id": 990621,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941224469.962, "dur": 2.110, + "args": { + "External id": 990622,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941224474.451, "dur": 2.358, + "args": { + "External id": 990623,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941224478.939, "dur": 0.498, + "args": { + "External id": 990624,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941224480.796, "dur": 2.596, + "args": { + "External id": 990625,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941224485.461, "dur": 0.491, + "args": { + "External id": 990626,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941224487.431, "dur": 0.513, + "args": { + "External id": 990627,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941224489.206, "dur": 0.369, + "args": { + "External id": 990628,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941224492.559, "dur": 0.708, + "args": { + "External id": 990629,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941224494.578, "dur": 0.566, + "args": { + "External id": 990630,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941224512.166, "dur": 48.204, + "args": { + "External id": 990631,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345941224598.786, "dur": 128.509, + "args": { + "External id": 990632,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941224612.496, "dur": 3.973, + "args": { + "External id": 990633,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345941224622.367, "dur": 11.729, + "args": { + "External id": 990634,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345941224627.511, "dur": 6.053, + "args": { + "External id": 990635,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941224631.220, "dur": 0.710, + "args": { + "External id": 990636,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941224641.380, "dur": 32.041, + "args": { + "External id": 990637,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941224643.802, "dur": 3.395, + "args": { + "External id": 990638,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941224650.047, "dur": 0.574, + "args": { + "External id": 990639,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941224652.231, "dur": 0.586, + "args": { + "External id": 990640,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941224654.335, "dur": 2.685, + "args": { + "External id": 990641,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941224658.671, "dur": 0.649, + "args": { + "External id": 990642,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941224660.931, "dur": 0.322, + "args": { + "External id": 990643,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941224664.415, "dur": 0.330, + "args": { + "External id": 990644,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941224665.646, "dur": 0.601, + "args": { + "External id": 990645,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941224667.609, "dur": 2.493, + "args": { + "External id": 990646,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941224685.280, "dur": 33.775, + "args": { + "External id": 990647,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941224780.191, "dur": 148.449, + "args": { + "External id": 990648,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941224814.815, "dur": 109.596, + "args": { + "External id": 990649,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8632, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941224825.793, "dur": 93.410, + "args": { + "External id": 990650,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941224954.769, "dur": 2.191, + "args": { + "External id": 990651,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8634, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941225122.501, "dur": 2082.035, + "args": { + "External id": 990652,"Sequence number": 10552509, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8635 + } + }, + { + "ph": "f", "id": 410, "pid": 2338710, "tid": 2379450, "ts": 6345941225122.501, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941225252.094, "dur": 132.968, + "args": { + "External id": 990653,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941225435.875, "dur": 46.867, + "args": { + "External id": 990654,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345941225505.227, "dur": 61.524, + "args": { + "External id": 990655,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941225579.285, "dur": 39.978, + "args": { + "External id": 990656,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941225627.344, "dur": 38.444, + "args": { + "External id": 990657,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941225673.346, "dur": 34.297, + "args": { + "External id": 990658,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941225718.286, "dur": 32.922, + "args": { + "External id": 990659,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941225784.116, "dur": 27.055, + "args": { + "External id": 990660,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941225832.150, "dur": 33.399, + "args": { + "External id": 990661,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941225891.893, "dur": 23.156, + "args": { + "External id": 990662,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941225932.158, "dur": 17.993, + "args": { + "External id": 990663,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941225961.015, "dur": 43.716, + "args": { + "External id": 990664,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941226028.770, "dur": 86.449, + "args": { + "External id": 990665,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345941226155.412, "dur": 321.980, + "args": { + "External id": 990666,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941226267.856, "dur": 9.502, + "args": { + "External id": 990667,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941226279.829, "dur": 3.731, + "args": { + "External id": 990668,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941226284.843, "dur": 2.407, + "args": { + "External id": 990669,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941226288.270, "dur": 4.641, + "args": { + "External id": 990670,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941226349.966, "dur": 5.978, + "args": { + "External id": 990671,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941226352.166, "dur": 3.585, + "args": { + "External id": 990672,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941226358.216, "dur": 43.669, + "args": { + "External id": 990673,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941226370.398, "dur": 1.991, + "args": { + "External id": 990674,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941226406.569, "dur": 1.710, + "args": { + "External id": 990675,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941226407.373, "dur": 0.779, + "args": { + "External id": 990676,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941226409.563, "dur": 18.331, + "args": { + "External id": 990677,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941226412.152, "dur": 0.798, + "args": { + "External id": 990678,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941226525.747, "dur": 36.084, + "args": { + "External id": 990679,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941226582.108, "dur": 20.146, + "args": { + "External id": 990680,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941226611.027, "dur": 55.007, + "args": { + "External id": 990681,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941226673.486, "dur": 45.709, + "args": { + "External id": 990682,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941226727.473, "dur": 26.350, + "args": { + "External id": 990683,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941226762.019, "dur": 35.854, + "args": { + "External id": 990684,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941226806.226, "dur": 31.874, + "args": { + "External id": 990685,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941226845.072, "dur": 34.059, + "args": { + "External id": 990686,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345941226904.095, "dur": 27.762, + "args": { + "External id": 990687,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 8670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941226953.010, "dur": 27.477, + "args": { + "External id": 990688,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941227001.228, "dur": 42.412, + "args": { + "External id": 990689,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941227106.572, "dur": 21.650, + "args": { + "External id": 990690,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345941227149.514, "dur": 19.087, + "args": { + "External id": 990691,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941227254.900, "dur": 18.039, + "args": { + "External id": 990692,"Record function id": 0, "Ev Idx": 8675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941227259.016, "dur": 12.641, + "args": { + "External id": 990693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941227264.148, "dur": 6.354, + "args": { + "External id": 990694,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941227266.168, "dur": 4.233, + "args": { + "External id": 990695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941227277.516, "dur": 5.770, + "args": { + "External id": 990696,"Record function id": 0, "Ev Idx": 8679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941227279.331, "dur": 3.424, + "args": { + "External id": 990697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941227280.202, "dur": 1.972, + "args": { + "External id": 990698,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941227281.268, "dur": 0.778, + "args": { + "External id": 990699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941227287.128, "dur": 4.922, + "args": { + "External id": 990700,"Record function id": 0, "Ev Idx": 8683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941227288.396, "dur": 3.155, + "args": { + "External id": 990701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941227289.345, "dur": 1.663, + "args": { + "External id": 990702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941227290.031, "dur": 0.825, + "args": { + "External id": 990703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941227295.827, "dur": 4.933, + "args": { + "External id": 990704,"Record function id": 0, "Ev Idx": 8687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941227297.374, "dur": 2.887, + "args": { + "External id": 990705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941227298.204, "dur": 1.569, + "args": { + "External id": 990706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941227298.849, "dur": 0.817, + "args": { + "External id": 990707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941227304.380, "dur": 9.181, + "args": { + "External id": 990708,"Record function id": 0, "Ev Idx": 8691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941227310.381, "dur": 2.698, + "args": { + "External id": 990709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941227311.096, "dur": 1.452, + "args": { + "External id": 990710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941227311.801, "dur": 0.673, + "args": { + "External id": 990711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941227317.441, "dur": 9.644, + "args": { + "External id": 990712,"Record function id": 0, "Ev Idx": 8695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941227318.755, "dur": 7.834, + "args": { + "External id": 990713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941227319.443, "dur": 6.652, + "args": { + "External id": 990714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941227322.460, "dur": 3.503, + "args": { + "External id": 990715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941227330.864, "dur": 4.815, + "args": { + "External id": 990716,"Record function id": 0, "Ev Idx": 8699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941227332.136, "dur": 3.061, + "args": { + "External id": 990717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941227333.010, "dur": 1.711, + "args": { + "External id": 990718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941227333.434, "dur": 1.181, + "args": { + "External id": 990719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941227339.396, "dur": 40.840, + "args": { + "External id": 990720,"Record function id": 0, "Ev Idx": 8703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941227376.802, "dur": 2.782, + "args": { + "External id": 990721,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941227377.569, "dur": 1.494, + "args": { + "External id": 990722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941227378.135, "dur": 0.846, + "args": { + "External id": 990723,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941227384.483, "dur": 6.713, + "args": { + "External id": 990724,"Record function id": 0, "Ev Idx": 8707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941227385.792, "dur": 4.813, + "args": { + "External id": 990725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941227386.405, "dur": 3.696, + "args": { + "External id": 990726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941227389.215, "dur": 0.786, + "args": { + "External id": 990727,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941227396.019, "dur": 63225.296, + "args": { + "External id": 990728,"Record function id": 0, "Sequence number": 10552508, "Fwd thread id": 1, "Ev Idx": 8711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941227397.694, "dur": 63211.423, + "args": { + "External id": 990729,"Sequence number": 10552508, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8712 + } + }, + { + "ph": "f", "id": 411, "pid": 2338710, "tid": 2379450, "ts": 6345941227397.694, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.18)", "pid": 2338710, "tid": 2379450, + "ts": 6345941227437.500, "dur": 48.308, + "args": { + "External id": 990730,"Record function id": 0, "Ev Idx": 8713 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.18)", "pid": 2338710, "tid": 2379450, + "ts": 6345941227494.566, "dur": 72.824, + "args": { + "External id": 990731,"Record function id": 0, "Ev Idx": 8714 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.18)", "pid": 2338710, "tid": 2379450, + "ts": 6345941227574.396, "dur": 63024.750, + "args": { + "External id": 990732,"Record function id": 0, "Ev Idx": 8715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941227675.982, "dur": 12.194, + "args": { + "External id": 990733,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941227701.747, "dur": 5.659, + "args": { + "External id": 990734,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941227723.447, "dur": 61663.841, + "args": { + "External id": 990735,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941227739.436, "dur": 61631.129, + "args": { + "External id": 990736,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941227847.996, "dur": 20.448, + "args": { + "External id": 990737,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941227893.021, "dur": 61419.056, + "args": { + "External id": 990738,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941227897.556, "dur": 61413.141, + "args": { + "External id": 990739,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941227902.664, "dur": 10.524, + "args": { + "External id": 990740,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941227915.727, "dur": 61388.158, + "args": { + "External id": 990741,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941289525.684, "dur": 16.543, + "args": { + "External id": 990742,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941289530.655, "dur": 11.137, + "args": { + "External id": 990743,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941289583.185, "dur": 540.615, + "args": { + "External id": 990744,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941289626.485, "dur": 489.098, + "args": { + "External id": 990745,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8728, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941289644.818, "dur": 461.535, + "args": { + "External id": 990746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941290155.326, "dur": 3.105, + "args": { + "External id": 990747,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8730, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941290234.833, "dur": 9.097, + "args": { + "External id": 990748,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941290262.269, "dur": 48.085, + "args": { + "External id": 990749,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941290323.554, "dur": 4.939, + "args": { + "External id": 990750,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941290334.857, "dur": 16.190, + "args": { + "External id": 990751,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941290357.337, "dur": 1.213, + "args": { + "External id": 990752,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941290364.700, "dur": 15.873, + "args": { + "External id": 990753,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941290388.340, "dur": 1.107, + "args": { + "External id": 990754,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941290395.614, "dur": 13.913, + "args": { + "External id": 990755,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941290414.657, "dur": 1.005, + "args": { + "External id": 990756,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941290420.177, "dur": 13.295, + "args": { + "External id": 990757,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941290438.255, "dur": 1.659, + "args": { + "External id": 990758,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941290444.997, "dur": 12.732, + "args": { + "External id": 990759,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941290464.693, "dur": 1.028, + "args": { + "External id": 990760,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941290471.613, "dur": 13.578, + "args": { + "External id": 990761,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941290490.027, "dur": 0.883, + "args": { + "External id": 990762,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941290495.267, "dur": 12.738, + "args": { + "External id": 990763,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941290512.363, "dur": 0.971, + "args": { + "External id": 990764,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941290517.688, "dur": 13.224, + "args": { + "External id": 990765,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941290640.294, "dur": 3314.567, + "args": { + "External id": 990766,"Record function id": 0, "Ev Idx": 8749 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.17)", "pid": 2338710, "tid": 2379450, + "ts": 6345941290666.090, "dur": 1239.400, + "args": { + "External id": 990767,"Record function id": 0, "Ev Idx": 8750 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2338710, "tid": 2379450, + "ts": 6345941290685.281, "dur": 446.637, + "args": { + "External id": 990768,"Record function id": 0, "Ev Idx": 8751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941290780.371, "dur": 8.643, + "args": { + "External id": 990769,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941290793.763, "dur": 1.188, + "args": { + "External id": 990770,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941290796.905, "dur": 0.902, + "args": { + "External id": 990771,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941290800.192, "dur": 1.014, + "args": { + "External id": 990772,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941290810.675, "dur": 0.911, + "args": { + "External id": 990773,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941290814.562, "dur": 1.154, + "args": { + "External id": 990774,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941290817.288, "dur": 1.147, + "args": { + "External id": 990775,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941290823.022, "dur": 1.043, + "args": { + "External id": 990776,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941290825.832, "dur": 3.114, + "args": { + "External id": 990777,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941290830.642, "dur": 1.020, + "args": { + "External id": 990778,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941290851.877, "dur": 199.612, + "args": { + "External id": 990779,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941290872.102, "dur": 173.168, + "args": { + "External id": 990780,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941290891.898, "dur": 16.624, + "args": { + "External id": 990781,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941290913.649, "dur": 78.044, + "args": { + "External id": 990782,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941290916.749, "dur": 74.474, + "args": { + "External id": 990783,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941290921.504, "dur": 6.469, + "args": { + "External id": 990784,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941290929.935, "dur": 60.611, + "args": { + "External id": 990785,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8768 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.16", "pid": 2338710, "tid": 2379450, + "ts": 6345941291227.185, "dur": 669.098, + "args": { + "External id": 990786,"Record function id": 0, "Ev Idx": 8769 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2338710, "tid": 2379450, + "ts": 6345941291247.938, "dur": 634.500, + "args": { + "External id": 990787,"Record function id": 0, "Ev Idx": 8770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941291313.764, "dur": 8.281, + "args": { + "External id": 990788,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941291340.662, "dur": 32.965, + "args": { + "External id": 990789,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941291346.757, "dur": 1.906, + "args": { + "External id": 990790,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941291350.908, "dur": 1.875, + "args": { + "External id": 990791,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941291354.227, "dur": 0.658, + "args": { + "External id": 990792,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941291356.251, "dur": 2.431, + "args": { + "External id": 990793,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941291361.468, "dur": 0.384, + "args": { + "External id": 990794,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941291362.907, "dur": 0.527, + "args": { + "External id": 990795,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941291364.403, "dur": 0.600, + "args": { + "External id": 990796,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941291366.911, "dur": 0.834, + "args": { + "External id": 990797,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941291369.283, "dur": 0.695, + "args": { + "External id": 990798,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941291385.613, "dur": 51.571, + "args": { + "External id": 990799,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345941291472.585, "dur": 131.601, + "args": { + "External id": 990800,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941291489.077, "dur": 4.216, + "args": { + "External id": 990801,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345941291499.385, "dur": 11.132, + "args": { + "External id": 990802,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345941291504.130, "dur": 5.902, + "args": { + "External id": 990803,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941291507.777, "dur": 0.720, + "args": { + "External id": 990804,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941291518.383, "dur": 31.772, + "args": { + "External id": 990805,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941291520.530, "dur": 2.740, + "args": { + "External id": 990806,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941291525.948, "dur": 0.356, + "args": { + "External id": 990807,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941291528.027, "dur": 0.602, + "args": { + "External id": 990808,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941291529.857, "dur": 1.644, + "args": { + "External id": 990809,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941291532.971, "dur": 0.570, + "args": { + "External id": 990810,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941291534.717, "dur": 0.661, + "args": { + "External id": 990811,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941291537.485, "dur": 0.430, + "args": { + "External id": 990812,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941291539.189, "dur": 0.362, + "args": { + "External id": 990813,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941291540.405, "dur": 2.966, + "args": { + "External id": 990814,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941291560.840, "dur": 34.693, + "args": { + "External id": 990815,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941291657.552, "dur": 143.007, + "args": { + "External id": 990816,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941291693.518, "dur": 103.304, + "args": { + "External id": 990817,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8800, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941291706.271, "dur": 85.834, + "args": { + "External id": 990818,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941291824.064, "dur": 2.283, + "args": { + "External id": 990819,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8802, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941291913.712, "dur": 2017.211, + "args": { + "External id": 990820,"Sequence number": 10552507, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8803 + } + }, + { + "ph": "f", "id": 412, "pid": 2338710, "tid": 2379450, "ts": 6345941291913.712, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941292102.906, "dur": 130.023, + "args": { + "External id": 990821,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941292289.082, "dur": 48.158, + "args": { + "External id": 990822,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345941292359.738, "dur": 58.810, + "args": { + "External id": 990823,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941292429.942, "dur": 37.200, + "args": { + "External id": 990824,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941292474.448, "dur": 38.530, + "args": { + "External id": 990825,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941292520.307, "dur": 31.544, + "args": { + "External id": 990826,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941292562.256, "dur": 33.544, + "args": { + "External id": 990827,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941292623.967, "dur": 29.659, + "args": { + "External id": 990828,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941292674.908, "dur": 32.647, + "args": { + "External id": 990829,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941292733.438, "dur": 21.844, + "args": { + "External id": 990830,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941292771.328, "dur": 15.090, + "args": { + "External id": 990831,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941292796.621, "dur": 40.595, + "args": { + "External id": 990832,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941292841.757, "dur": 36.191, + "args": { + "External id": 990833,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345941292911.338, "dur": 388.945, + "args": { + "External id": 990834,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941293045.581, "dur": 53.983, + "args": { + "External id": 990835,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941293103.882, "dur": 4.116, + "args": { + "External id": 990836,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941293109.137, "dur": 2.708, + "args": { + "External id": 990837,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941293113.259, "dur": 4.870, + "args": { + "External id": 990838,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941293174.822, "dur": 6.160, + "args": { + "External id": 990839,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941293177.385, "dur": 3.356, + "args": { + "External id": 990840,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941293183.402, "dur": 42.049, + "args": { + "External id": 990841,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941293190.538, "dur": 2.141, + "args": { + "External id": 990842,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941293227.502, "dur": 2.567, + "args": { + "External id": 990843,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941293229.088, "dur": 0.891, + "args": { + "External id": 990844,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941293231.194, "dur": 17.308, + "args": { + "External id": 990845,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941293234.007, "dur": 0.822, + "args": { + "External id": 990846,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941293344.716, "dur": 33.537, + "args": { + "External id": 990847,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941293398.670, "dur": 18.514, + "args": { + "External id": 990848,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941293426.059, "dur": 58.283, + "args": { + "External id": 990849,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941293491.666, "dur": 45.618, + "args": { + "External id": 990850,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941293546.043, "dur": 23.889, + "args": { + "External id": 990851,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941293579.303, "dur": 34.607, + "args": { + "External id": 990852,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941293621.422, "dur": 30.676, + "args": { + "External id": 990853,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941293660.463, "dur": 34.207, + "args": { + "External id": 990854,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345941293718.184, "dur": 26.841, + "args": { + "External id": 990855,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 8838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941293765.810, "dur": 26.736, + "args": { + "External id": 990856,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941293809.788, "dur": 19.530, + "args": { + "External id": 990857,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941293846.312, "dur": 15.136, + "args": { + "External id": 990858,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345941293875.749, "dur": 18.273, + "args": { + "External id": 990859,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941293984.474, "dur": 21.748, + "args": { + "External id": 990860,"Record function id": 0, "Ev Idx": 8843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941293988.438, "dur": 16.576, + "args": { + "External id": 990861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941293997.289, "dur": 6.631, + "args": { + "External id": 990862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941293999.436, "dur": 4.329, + "args": { + "External id": 990863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941294034.396, "dur": 12.811, + "args": { + "External id": 990864,"Record function id": 0, "Ev Idx": 8847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941294036.484, "dur": 9.980, + "args": { + "External id": 990865,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941294042.071, "dur": 3.221, + "args": { + "External id": 990866,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941294043.045, "dur": 1.989, + "args": { + "External id": 990867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941294051.337, "dur": 46.301, + "args": { + "External id": 990868,"Record function id": 0, "Ev Idx": 8851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941294090.976, "dur": 5.795, + "args": { + "External id": 990869,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941294092.558, "dur": 3.085, + "args": { + "External id": 990870,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941294093.552, "dur": 1.786, + "args": { + "External id": 990871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941294106.430, "dur": 4.624, + "args": { + "External id": 990872,"Record function id": 0, "Ev Idx": 8855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941294107.853, "dur": 2.740, + "args": { + "External id": 990873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941294108.656, "dur": 1.453, + "args": { + "External id": 990874,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941294109.226, "dur": 0.800, + "args": { + "External id": 990875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941294114.903, "dur": 4.710, + "args": { + "External id": 990876,"Record function id": 0, "Ev Idx": 8859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941294116.330, "dur": 2.828, + "args": { + "External id": 990877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941294116.953, "dur": 1.640, + "args": { + "External id": 990878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941294117.771, "dur": 0.741, + "args": { + "External id": 990879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941294123.429, "dur": 7.524, + "args": { + "External id": 990880,"Record function id": 0, "Ev Idx": 8863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941294125.105, "dur": 5.333, + "args": { + "External id": 990881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941294125.734, "dur": 4.173, + "args": { + "External id": 990882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941294126.221, "dur": 3.584, + "args": { + "External id": 990883,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941294134.693, "dur": 4.037, + "args": { + "External id": 990884,"Record function id": 0, "Ev Idx": 8867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941294135.802, "dur": 2.410, + "args": { + "External id": 990885,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941294136.428, "dur": 1.317, + "args": { + "External id": 990886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941294136.804, "dur": 0.861, + "args": { + "External id": 990887,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941294142.328, "dur": 6.603, + "args": { + "External id": 990888,"Record function id": 0, "Ev Idx": 8871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941294143.473, "dur": 4.976, + "args": { + "External id": 990889,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941294144.082, "dur": 3.501, + "args": { + "External id": 990890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941294146.778, "dur": 0.680, + "args": { + "External id": 990891,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941294153.399, "dur": 4.562, + "args": { + "External id": 990892,"Record function id": 0, "Ev Idx": 8875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941294154.832, "dur": 2.662, + "args": { + "External id": 990893,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941294155.425, "dur": 1.577, + "args": { + "External id": 990894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941294155.823, "dur": 1.057, + "args": { + "External id": 990895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941294162.752, "dur": 61514.915, + "args": { + "External id": 990896,"Record function id": 0, "Sequence number": 10552506, "Fwd thread id": 1, "Ev Idx": 8879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941294164.314, "dur": 61502.273, + "args": { + "External id": 990897,"Sequence number": 10552506, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8880 + } + }, + { + "ph": "f", "id": 413, "pid": 2338710, "tid": 2379450, "ts": 6345941294164.314, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.17)", "pid": 2338710, "tid": 2379450, + "ts": 6345941294204.538, "dur": 46.331, + "args": { + "External id": 990898,"Record function id": 0, "Ev Idx": 8881 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.17)", "pid": 2338710, "tid": 2379450, + "ts": 6345941294260.076, "dur": 72.507, + "args": { + "External id": 990899,"Record function id": 0, "Ev Idx": 8882 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.17)", "pid": 2338710, "tid": 2379450, + "ts": 6345941294338.895, "dur": 61317.703, + "args": { + "External id": 990900,"Record function id": 0, "Ev Idx": 8883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941294446.873, "dur": 8.612, + "args": { + "External id": 990901,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941294466.773, "dur": 5.973, + "args": { + "External id": 990902,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941294491.112, "dur": 59957.499, + "args": { + "External id": 990903,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941294507.017, "dur": 59925.183, + "args": { + "External id": 990904,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941294609.147, "dur": 21.480, + "args": { + "External id": 990905,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941294655.369, "dur": 59724.062, + "args": { + "External id": 990906,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941294660.527, "dur": 59717.671, + "args": { + "External id": 990907,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941294670.773, "dur": 9.567, + "args": { + "External id": 990908,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941294683.010, "dur": 59688.156, + "args": { + "External id": 990909,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941354590.453, "dur": 16.625, + "args": { + "External id": 990910,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941354595.294, "dur": 11.302, + "args": { + "External id": 990911,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941354649.928, "dur": 544.620, + "args": { + "External id": 990912,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941354694.401, "dur": 492.288, + "args": { + "External id": 990913,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8896, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941354710.643, "dur": 467.910, + "args": { + "External id": 990914,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941355225.087, "dur": 2.671, + "args": { + "External id": 990915,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8898, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941355306.619, "dur": 9.325, + "args": { + "External id": 990916,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941355334.362, "dur": 40.443, + "args": { + "External id": 990917,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941355387.406, "dur": 4.437, + "args": { + "External id": 990918,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941355398.126, "dur": 15.826, + "args": { + "External id": 990919,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941355420.319, "dur": 0.949, + "args": { + "External id": 990920,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941355427.952, "dur": 15.240, + "args": { + "External id": 990921,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941355448.566, "dur": 0.993, + "args": { + "External id": 990922,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941355454.898, "dur": 12.998, + "args": { + "External id": 990923,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941355472.723, "dur": 1.276, + "args": { + "External id": 990924,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941355478.286, "dur": 13.167, + "args": { + "External id": 990925,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941355495.826, "dur": 0.989, + "args": { + "External id": 990926,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941355502.759, "dur": 12.757, + "args": { + "External id": 990927,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941355520.487, "dur": 0.888, + "args": { + "External id": 990928,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941355525.979, "dur": 12.951, + "args": { + "External id": 990929,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941355543.182, "dur": 0.931, + "args": { + "External id": 990930,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941355548.599, "dur": 12.067, + "args": { + "External id": 990931,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941355567.362, "dur": 0.994, + "args": { + "External id": 990932,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941355573.571, "dur": 13.001, + "args": { + "External id": 990933,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941355695.508, "dur": 3446.505, + "args": { + "External id": 990934,"Record function id": 0, "Ev Idx": 8917 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.16)", "pid": 2338710, "tid": 2379450, + "ts": 6345941355720.479, "dur": 1235.819, + "args": { + "External id": 990935,"Record function id": 0, "Ev Idx": 8918 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2338710, "tid": 2379450, + "ts": 6345941355740.913, "dur": 447.295, + "args": { + "External id": 990936,"Record function id": 0, "Ev Idx": 8919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941355833.737, "dur": 8.237, + "args": { + "External id": 990937,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941355846.527, "dur": 0.957, + "args": { + "External id": 990938,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941355850.094, "dur": 1.415, + "args": { + "External id": 990939,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941355853.406, "dur": 0.985, + "args": { + "External id": 990940,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941355857.028, "dur": 0.935, + "args": { + "External id": 990941,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941355859.548, "dur": 0.964, + "args": { + "External id": 990942,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941355862.183, "dur": 0.820, + "args": { + "External id": 990943,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941355866.914, "dur": 1.289, + "args": { + "External id": 990944,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941355869.667, "dur": 2.909, + "args": { + "External id": 990945,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941355874.045, "dur": 0.703, + "args": { + "External id": 990946,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941355895.302, "dur": 251.965, + "args": { + "External id": 990947,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941355917.069, "dur": 224.150, + "args": { + "External id": 990948,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941355936.346, "dur": 17.031, + "args": { + "External id": 990949,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941355959.001, "dur": 145.296, + "args": { + "External id": 990950,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941355962.433, "dur": 141.342, + "args": { + "External id": 990951,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941355968.322, "dur": 7.112, + "args": { + "External id": 990952,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941355977.863, "dur": 124.350, + "args": { + "External id": 990953,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8936 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.15", "pid": 2338710, "tid": 2379450, + "ts": 6345941356284.468, "dur": 663.037, + "args": { + "External id": 990954,"Record function id": 0, "Ev Idx": 8937 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2338710, "tid": 2379450, + "ts": 6345941356303.559, "dur": 629.494, + "args": { + "External id": 990955,"Record function id": 0, "Ev Idx": 8938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941356372.170, "dur": 8.349, + "args": { + "External id": 990956,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941356398.418, "dur": 32.971, + "args": { + "External id": 990957,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941356404.818, "dur": 2.101, + "args": { + "External id": 990958,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941356409.062, "dur": 0.781, + "args": { + "External id": 990959,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941356411.849, "dur": 0.478, + "args": { + "External id": 990960,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941356414.240, "dur": 2.408, + "args": { + "External id": 990961,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941356418.266, "dur": 0.563, + "args": { + "External id": 990962,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941356420.733, "dur": 0.489, + "args": { + "External id": 990963,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941356423.255, "dur": 0.577, + "args": { + "External id": 990964,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941356425.103, "dur": 0.373, + "args": { + "External id": 990965,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941356427.614, "dur": 0.516, + "args": { + "External id": 990966,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941356443.599, "dur": 48.989, + "args": { + "External id": 990967,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345941356532.444, "dur": 138.555, + "args": { + "External id": 990968,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941356546.349, "dur": 4.091, + "args": { + "External id": 990969,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345941356556.588, "dur": 12.411, + "args": { + "External id": 990970,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345941356561.787, "dur": 6.695, + "args": { + "External id": 990971,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941356566.207, "dur": 0.598, + "args": { + "External id": 990972,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941356580.742, "dur": 30.337, + "args": { + "External id": 990973,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941356583.153, "dur": 3.030, + "args": { + "External id": 990974,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941356588.701, "dur": 0.415, + "args": { + "External id": 990975,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941356591.073, "dur": 0.367, + "args": { + "External id": 990976,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941356592.583, "dur": 0.593, + "args": { + "External id": 990977,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941356595.409, "dur": 0.454, + "args": { + "External id": 990978,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941356597.803, "dur": 0.562, + "args": { + "External id": 990979,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941356599.686, "dur": 0.731, + "args": { + "External id": 990980,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941356602.368, "dur": 0.380, + "args": { + "External id": 990981,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941356604.079, "dur": 2.970, + "args": { + "External id": 990982,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941356622.456, "dur": 40.243, + "args": { + "External id": 990983,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941356722.026, "dur": 133.071, + "args": { + "External id": 990984,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941356751.656, "dur": 99.353, + "args": { + "External id": 990985,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8968, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941356762.518, "dur": 83.625, + "args": { + "External id": 990986,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941356875.104, "dur": 2.395, + "args": { + "External id": 990987,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8970, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941356964.373, "dur": 2144.626, + "args": { + "External id": 990988,"Sequence number": 10552505, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8971 + } + }, + { + "ph": "f", "id": 414, "pid": 2338710, "tid": 2379450, "ts": 6345941356964.373, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941357158.398, "dur": 130.096, + "args": { + "External id": 990989,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941357343.131, "dur": 49.945, + "args": { + "External id": 990990,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345941357416.042, "dur": 57.746, + "args": { + "External id": 990991,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941357485.637, "dur": 35.928, + "args": { + "External id": 990992,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941357529.757, "dur": 35.140, + "args": { + "External id": 990993,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941357571.920, "dur": 31.027, + "args": { + "External id": 990994,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941357613.679, "dur": 31.627, + "args": { + "External id": 990995,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941357672.761, "dur": 27.213, + "args": { + "External id": 990996,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941357723.090, "dur": 35.265, + "args": { + "External id": 990997,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941357783.788, "dur": 23.862, + "args": { + "External id": 990998,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941357825.204, "dur": 19.007, + "args": { + "External id": 990999,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941357857.411, "dur": 44.986, + "args": { + "External id": 991000,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941357906.499, "dur": 38.468, + "args": { + "External id": 991001,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345941357977.584, "dur": 400.377, + "args": { + "External id": 991002,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941358156.837, "dur": 11.730, + "args": { + "External id": 991003,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941358172.194, "dur": 3.790, + "args": { + "External id": 991004,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941358178.087, "dur": 6.112, + "args": { + "External id": 991005,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941358185.498, "dur": 4.581, + "args": { + "External id": 991006,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941358249.732, "dur": 6.370, + "args": { + "External id": 991007,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941358252.082, "dur": 3.753, + "args": { + "External id": 991008,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941358258.402, "dur": 42.750, + "args": { + "External id": 991009,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941358265.814, "dur": 2.047, + "args": { + "External id": 991010,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941358303.293, "dur": 2.680, + "args": { + "External id": 991011,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941358304.887, "dur": 0.988, + "args": { + "External id": 991012,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941358307.627, "dur": 17.105, + "args": { + "External id": 991013,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941358310.133, "dur": 0.874, + "args": { + "External id": 991014,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941358421.179, "dur": 33.552, + "args": { + "External id": 991015,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941358474.115, "dur": 18.292, + "args": { + "External id": 991016,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941358502.084, "dur": 59.871, + "args": { + "External id": 991017,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941358571.219, "dur": 46.807, + "args": { + "External id": 991018,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941358628.116, "dur": 25.888, + "args": { + "External id": 991019,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941358663.559, "dur": 37.825, + "args": { + "External id": 991020,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941358709.755, "dur": 34.059, + "args": { + "External id": 991021,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941358751.982, "dur": 35.446, + "args": { + "External id": 991022,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345941358808.397, "dur": 28.715, + "args": { + "External id": 991023,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941358854.723, "dur": 28.838, + "args": { + "External id": 991024,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941358908.610, "dur": 21.700, + "args": { + "External id": 991025,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941358951.073, "dur": 17.457, + "args": { + "External id": 991026,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345941358984.743, "dur": 40.896, + "args": { + "External id": 991027,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941359168.262, "dur": 20.157, + "args": { + "External id": 991028,"Record function id": 0, "Ev Idx": 9011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941359173.494, "dur": 13.723, + "args": { + "External id": 991029,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941359178.832, "dur": 7.150, + "args": { + "External id": 991030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941359180.931, "dur": 4.924, + "args": { + "External id": 991031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941359193.400, "dur": 6.564, + "args": { + "External id": 991032,"Record function id": 0, "Ev Idx": 9015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941359195.400, "dur": 4.008, + "args": { + "External id": 991033,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941359196.169, "dur": 2.701, + "args": { + "External id": 991034,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941359197.324, "dur": 1.432, + "args": { + "External id": 991035,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941359203.843, "dur": 6.186, + "args": { + "External id": 991036,"Record function id": 0, "Ev Idx": 9019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941359205.548, "dur": 3.981, + "args": { + "External id": 991037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941359206.317, "dur": 2.692, + "args": { + "External id": 991038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941359207.404, "dur": 1.443, + "args": { + "External id": 991039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941359213.673, "dur": 5.598, + "args": { + "External id": 991040,"Record function id": 0, "Ev Idx": 9023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941359215.174, "dur": 3.620, + "args": { + "External id": 991041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941359215.719, "dur": 2.581, + "args": { + "External id": 991042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941359216.641, "dur": 1.538, + "args": { + "External id": 991043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941359223.059, "dur": 6.054, + "args": { + "External id": 991044,"Record function id": 0, "Ev Idx": 9027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941359224.711, "dur": 3.894, + "args": { + "External id": 991045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941359225.382, "dur": 2.622, + "args": { + "External id": 991046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941359226.640, "dur": 1.239, + "args": { + "External id": 991047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941359232.960, "dur": 8.720, + "args": { + "External id": 991048,"Record function id": 0, "Ev Idx": 9031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941359234.679, "dur": 6.501, + "args": { + "External id": 991049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941359235.497, "dur": 5.184, + "args": { + "External id": 991050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941359236.592, "dur": 3.951, + "args": { + "External id": 991051,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941359245.523, "dur": 6.358, + "args": { + "External id": 991052,"Record function id": 0, "Ev Idx": 9035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941359247.244, "dur": 4.159, + "args": { + "External id": 991053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941359248.184, "dur": 2.704, + "args": { + "External id": 991054,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941359249.452, "dur": 1.345, + "args": { + "External id": 991055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941359256.504, "dur": 6.013, + "args": { + "External id": 991056,"Record function id": 0, "Ev Idx": 9039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941359258.290, "dur": 3.747, + "args": { + "External id": 991057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941359258.911, "dur": 2.631, + "args": { + "External id": 991058,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941359260.052, "dur": 1.331, + "args": { + "External id": 991059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941359266.934, "dur": 5.730, + "args": { + "External id": 991060,"Record function id": 0, "Ev Idx": 9043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941359268.513, "dur": 3.670, + "args": { + "External id": 991061,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941359269.035, "dur": 2.659, + "args": { + "External id": 991062,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941359270.229, "dur": 1.342, + "args": { + "External id": 991063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941359277.416, "dur": 60942.857, + "args": { + "External id": 991064,"Record function id": 0, "Sequence number": 10552504, "Fwd thread id": 1, "Ev Idx": 9047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941359279.474, "dur": 60928.894, + "args": { + "External id": 991065,"Sequence number": 10552504, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9048 + } + }, + { + "ph": "f", "id": 415, "pid": 2338710, "tid": 2379450, "ts": 6345941359279.474, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.16)", "pid": 2338710, "tid": 2379450, + "ts": 6345941359316.934, "dur": 45.790, + "args": { + "External id": 991066,"Record function id": 0, "Ev Idx": 9049 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.16)", "pid": 2338710, "tid": 2379450, + "ts": 6345941359373.140, "dur": 69.965, + "args": { + "External id": 991067,"Record function id": 0, "Ev Idx": 9050 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.16)", "pid": 2338710, "tid": 2379450, + "ts": 6345941359451.181, "dur": 60746.865, + "args": { + "External id": 991068,"Record function id": 0, "Ev Idx": 9051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941359555.541, "dur": 7.957, + "args": { + "External id": 991069,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941359574.989, "dur": 5.998, + "args": { + "External id": 991070,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941359596.785, "dur": 59435.002, + "args": { + "External id": 991071,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941359616.809, "dur": 59386.095, + "args": { + "External id": 991072,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941359727.000, "dur": 20.160, + "args": { + "External id": 991073,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941359771.722, "dur": 59170.798, + "args": { + "External id": 991074,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941359776.622, "dur": 59164.771, + "args": { + "External id": 991075,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941359782.823, "dur": 9.457, + "args": { + "External id": 991076,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941359795.299, "dur": 59139.282, + "args": { + "External id": 991077,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941419197.870, "dur": 17.062, + "args": { + "External id": 991078,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941419203.384, "dur": 10.672, + "args": { + "External id": 991079,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941419253.896, "dur": 423.872, + "args": { + "External id": 991080,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941419292.358, "dur": 379.870, + "args": { + "External id": 991081,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9064, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941419307.876, "dur": 357.344, + "args": { + "External id": 991082,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941419700.544, "dur": 2.575, + "args": { + "External id": 991083,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9066, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941419771.526, "dur": 9.043, + "args": { + "External id": 991084,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941419797.600, "dur": 42.229, + "args": { + "External id": 991085,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941419852.015, "dur": 5.074, + "args": { + "External id": 991086,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941419863.224, "dur": 15.600, + "args": { + "External id": 991087,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941419885.274, "dur": 0.995, + "args": { + "External id": 991088,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941419892.611, "dur": 13.610, + "args": { + "External id": 991089,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941419912.344, "dur": 0.952, + "args": { + "External id": 991090,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941419918.983, "dur": 12.420, + "args": { + "External id": 991091,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941419936.679, "dur": 1.028, + "args": { + "External id": 991092,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941419943.090, "dur": 13.277, + "args": { + "External id": 991093,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941419961.244, "dur": 1.189, + "args": { + "External id": 991094,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941419966.968, "dur": 12.194, + "args": { + "External id": 991095,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941419984.243, "dur": 1.140, + "args": { + "External id": 991096,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941419989.502, "dur": 13.261, + "args": { + "External id": 991097,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941420027.105, "dur": 3.368, + "args": { + "External id": 991098,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941420036.997, "dur": 55.585, + "args": { + "External id": 991099,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941420103.239, "dur": 1.837, + "args": { + "External id": 991100,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941420110.422, "dur": 14.347, + "args": { + "External id": 991101,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941420239.617, "dur": 3359.387, + "args": { + "External id": 991102,"Record function id": 0, "Ev Idx": 9085 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.15)", "pid": 2338710, "tid": 2379450, + "ts": 6345941420263.257, "dur": 1221.152, + "args": { + "External id": 991103,"Record function id": 0, "Ev Idx": 9086 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2338710, "tid": 2379450, + "ts": 6345941420281.507, "dur": 362.489, + "args": { + "External id": 991104,"Record function id": 0, "Ev Idx": 9087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941420376.647, "dur": 7.504, + "args": { + "External id": 991105,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941420387.731, "dur": 1.425, + "args": { + "External id": 991106,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941420391.618, "dur": 1.177, + "args": { + "External id": 991107,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941420394.715, "dur": 0.909, + "args": { + "External id": 991108,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941420397.838, "dur": 0.831, + "args": { + "External id": 991109,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941420400.126, "dur": 0.892, + "args": { + "External id": 991110,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941420402.590, "dur": 0.869, + "args": { + "External id": 991111,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941420405.560, "dur": 2.087, + "args": { + "External id": 991112,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941420409.551, "dur": 2.920, + "args": { + "External id": 991113,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941420413.949, "dur": 0.760, + "args": { + "External id": 991114,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941420437.576, "dur": 173.491, + "args": { + "External id": 991115,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941420458.395, "dur": 147.153, + "args": { + "External id": 991116,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941420476.549, "dur": 17.643, + "args": { + "External id": 991117,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941420500.024, "dur": 74.382, + "args": { + "External id": 991118,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941420503.356, "dur": 70.683, + "args": { + "External id": 991119,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941420508.226, "dur": 6.038, + "args": { + "External id": 991120,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941420516.506, "dur": 56.455, + "args": { + "External id": 991121,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9104 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.14", "pid": 2338710, "tid": 2379450, + "ts": 6345941420733.465, "dur": 741.949, + "args": { + "External id": 991122,"Record function id": 0, "Ev Idx": 9105 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2338710, "tid": 2379450, + "ts": 6345941420752.774, "dur": 707.608, + "args": { + "External id": 991123,"Record function id": 0, "Ev Idx": 9106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941420820.436, "dur": 6.061, + "args": { + "External id": 991124,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941420844.069, "dur": 31.807, + "args": { + "External id": 991125,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941420850.160, "dur": 1.630, + "args": { + "External id": 991126,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941420853.630, "dur": 0.752, + "args": { + "External id": 991127,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941420856.481, "dur": 0.572, + "args": { + "External id": 991128,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941420859.256, "dur": 2.629, + "args": { + "External id": 991129,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941420863.220, "dur": 0.426, + "args": { + "External id": 991130,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941420865.479, "dur": 0.473, + "args": { + "External id": 991131,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941420867.661, "dur": 0.498, + "args": { + "External id": 991132,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941420869.367, "dur": 0.460, + "args": { + "External id": 991133,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941420871.581, "dur": 0.624, + "args": { + "External id": 991134,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941420887.465, "dur": 49.406, + "args": { + "External id": 991135,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345941420972.343, "dur": 211.275, + "args": { + "External id": 991136,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941420983.553, "dur": 3.471, + "args": { + "External id": 991137,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345941420993.069, "dur": 11.680, + "args": { + "External id": 991138,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345941420997.933, "dur": 6.320, + "args": { + "External id": 991139,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941421001.973, "dur": 0.770, + "args": { + "External id": 991140,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941421036.315, "dur": 81.845, + "args": { + "External id": 991141,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941421048.759, "dur": 40.306, + "args": { + "External id": 991142,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941421094.109, "dur": 0.754, + "args": { + "External id": 991143,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941421096.959, "dur": 0.640, + "args": { + "External id": 991144,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941421099.119, "dur": 0.665, + "args": { + "External id": 991145,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941421101.890, "dur": 0.436, + "args": { + "External id": 991146,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941421104.206, "dur": 0.633, + "args": { + "External id": 991147,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941421106.422, "dur": 0.650, + "args": { + "External id": 991148,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941421108.895, "dur": 0.442, + "args": { + "External id": 991149,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941421111.535, "dur": 2.798, + "args": { + "External id": 991150,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941421133.425, "dur": 40.872, + "args": { + "External id": 991151,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941421238.514, "dur": 143.129, + "args": { + "External id": 991152,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941421273.729, "dur": 103.534, + "args": { + "External id": 991153,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9136, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941421285.722, "dur": 86.753, + "args": { + "External id": 991154,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941421399.503, "dur": 2.043, + "args": { + "External id": 991155,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9138, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941421492.415, "dur": 2080.495, + "args": { + "External id": 991156,"Sequence number": 10552503, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9139 + } + }, + { + "ph": "f", "id": 416, "pid": 2338710, "tid": 2379450, "ts": 6345941421492.415, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941421621.078, "dur": 130.711, + "args": { + "External id": 991157,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941421798.419, "dur": 43.181, + "args": { + "External id": 991158,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345941421863.719, "dur": 56.596, + "args": { + "External id": 991159,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941421930.983, "dur": 35.092, + "args": { + "External id": 991160,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941421973.069, "dur": 58.234, + "args": { + "External id": 991161,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941422044.603, "dur": 78.840, + "args": { + "External id": 991162,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941422139.153, "dur": 36.674, + "args": { + "External id": 991163,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941422207.314, "dur": 28.334, + "args": { + "External id": 991164,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941422260.574, "dur": 33.692, + "args": { + "External id": 991165,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941422317.257, "dur": 21.178, + "args": { + "External id": 991166,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941422354.536, "dur": 16.776, + "args": { + "External id": 991167,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941422383.478, "dur": 42.840, + "args": { + "External id": 991168,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941422430.447, "dur": 36.593, + "args": { + "External id": 991169,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345941422500.750, "dur": 318.322, + "args": { + "External id": 991170,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941422595.517, "dur": 7.430, + "args": { + "External id": 991171,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941422606.121, "dur": 3.043, + "args": { + "External id": 991172,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941422610.904, "dur": 13.132, + "args": { + "External id": 991173,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941422629.185, "dur": 5.384, + "args": { + "External id": 991174,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941422686.027, "dur": 6.392, + "args": { + "External id": 991175,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941422688.592, "dur": 3.546, + "args": { + "External id": 991176,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941422694.842, "dur": 37.395, + "args": { + "External id": 991177,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941422701.884, "dur": 1.900, + "args": { + "External id": 991178,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941422737.851, "dur": 2.754, + "args": { + "External id": 991179,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941422739.518, "dur": 0.949, + "args": { + "External id": 991180,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941422741.961, "dur": 17.177, + "args": { + "External id": 991181,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941422744.726, "dur": 0.655, + "args": { + "External id": 991182,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941422857.915, "dur": 36.837, + "args": { + "External id": 991183,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941422913.968, "dur": 18.813, + "args": { + "External id": 991184,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941422944.968, "dur": 49.362, + "args": { + "External id": 991185,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941423002.558, "dur": 117.775, + "args": { + "External id": 991186,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941423136.271, "dur": 32.966, + "args": { + "External id": 991187,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941423179.498, "dur": 41.302, + "args": { + "External id": 991188,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941423229.628, "dur": 34.043, + "args": { + "External id": 991189,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941423286.638, "dur": 36.344, + "args": { + "External id": 991190,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345941423347.460, "dur": 29.772, + "args": { + "External id": 991191,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941423396.171, "dur": 29.279, + "args": { + "External id": 991192,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941423442.533, "dur": 20.950, + "args": { + "External id": 991193,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941423480.970, "dur": 17.344, + "args": { + "External id": 991194,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345941423514.594, "dur": 19.737, + "args": { + "External id": 991195,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941423625.075, "dur": 18.843, + "args": { + "External id": 991196,"Record function id": 0, "Ev Idx": 9179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941423629.397, "dur": 13.224, + "args": { + "External id": 991197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941423634.477, "dur": 6.933, + "args": { + "External id": 991198,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941423636.454, "dur": 4.816, + "args": { + "External id": 991199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941423648.626, "dur": 6.349, + "args": { + "External id": 991200,"Record function id": 0, "Ev Idx": 9183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941423650.186, "dur": 4.221, + "args": { + "External id": 991201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941423651.270, "dur": 2.591, + "args": { + "External id": 991202,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941423652.279, "dur": 1.442, + "args": { + "External id": 991203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941423659.012, "dur": 5.747, + "args": { + "External id": 991204,"Record function id": 0, "Ev Idx": 9187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941423660.604, "dur": 3.690, + "args": { + "External id": 991205,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941423661.209, "dur": 2.598, + "args": { + "External id": 991206,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941423662.253, "dur": 1.453, + "args": { + "External id": 991207,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941423668.669, "dur": 5.818, + "args": { + "External id": 991208,"Record function id": 0, "Ev Idx": 9191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941423670.221, "dur": 3.784, + "args": { + "External id": 991209,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941423670.805, "dur": 2.720, + "args": { + "External id": 991210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941423671.876, "dur": 1.564, + "args": { + "External id": 991211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941423678.201, "dur": 6.226, + "args": { + "External id": 991212,"Record function id": 0, "Ev Idx": 9195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941423679.809, "dur": 4.151, + "args": { + "External id": 991213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941423680.499, "dur": 2.927, + "args": { + "External id": 991214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941423681.797, "dur": 1.542, + "args": { + "External id": 991215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941423688.037, "dur": 8.533, + "args": { + "External id": 991216,"Record function id": 0, "Ev Idx": 9199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941423689.670, "dur": 6.423, + "args": { + "External id": 991217,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941423690.431, "dur": 5.161, + "args": { + "External id": 991218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941423691.428, "dur": 4.077, + "args": { + "External id": 991219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941423700.357, "dur": 6.039, + "args": { + "External id": 991220,"Record function id": 0, "Ev Idx": 9203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941423701.936, "dur": 3.975, + "args": { + "External id": 991221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941423702.827, "dur": 2.603, + "args": { + "External id": 991222,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941423703.834, "dur": 1.505, + "args": { + "External id": 991223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941423710.756, "dur": 5.377, + "args": { + "External id": 991224,"Record function id": 0, "Ev Idx": 9207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941423712.340, "dur": 3.333, + "args": { + "External id": 991225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941423712.923, "dur": 2.263, + "args": { + "External id": 991226,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941423713.886, "dur": 1.210, + "args": { + "External id": 991227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941423720.184, "dur": 5.416, + "args": { + "External id": 991228,"Record function id": 0, "Ev Idx": 9211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941423721.853, "dur": 3.288, + "args": { + "External id": 991229,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941423722.440, "dur": 2.131, + "args": { + "External id": 991230,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941423723.220, "dur": 1.261, + "args": { + "External id": 991231,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941423730.531, "dur": 60822.541, + "args": { + "External id": 991232,"Record function id": 0, "Sequence number": 10552502, "Fwd thread id": 1, "Ev Idx": 9215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941423766.421, "dur": 60775.232, + "args": { + "External id": 991233,"Sequence number": 10552502, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9216 + } + }, + { + "ph": "f", "id": 417, "pid": 2338710, "tid": 2379450, "ts": 6345941423766.421, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.15)", "pid": 2338710, "tid": 2379450, + "ts": 6345941423810.251, "dur": 47.483, + "args": { + "External id": 991234,"Record function id": 0, "Ev Idx": 9217 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.15)", "pid": 2338710, "tid": 2379450, + "ts": 6345941423868.231, "dur": 72.399, + "args": { + "External id": 991235,"Record function id": 0, "Ev Idx": 9218 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.15)", "pid": 2338710, "tid": 2379450, + "ts": 6345941423948.231, "dur": 60582.433, + "args": { + "External id": 991236,"Record function id": 0, "Ev Idx": 9219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941424120.413, "dur": 11.022, + "args": { + "External id": 991237,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941424145.678, "dur": 6.126, + "args": { + "External id": 991238,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941424170.007, "dur": 59198.386, + "args": { + "External id": 991239,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941424187.187, "dur": 59165.645, + "args": { + "External id": 991240,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941424301.729, "dur": 32.615, + "args": { + "External id": 991241,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941424362.622, "dur": 58939.390, + "args": { + "External id": 991242,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941424366.088, "dur": 58934.958, + "args": { + "External id": 991243,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941424372.206, "dur": 15.839, + "args": { + "External id": 991244,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941424392.872, "dur": 58905.911, + "args": { + "External id": 991245,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941483496.810, "dur": 14.063, + "args": { + "External id": 991246,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941483501.210, "dur": 9.208, + "args": { + "External id": 991247,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941483548.816, "dur": 424.866, + "args": { + "External id": 991248,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941483586.462, "dur": 380.440, + "args": { + "External id": 991249,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9232, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941483602.537, "dur": 357.178, + "args": { + "External id": 991250,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941483999.385, "dur": 3.673, + "args": { + "External id": 991251,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9234, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941484150.254, "dur": 9.328, + "args": { + "External id": 991252,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941484178.536, "dur": 47.182, + "args": { + "External id": 991253,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941484238.871, "dur": 5.454, + "args": { + "External id": 991254,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941484252.659, "dur": 34.256, + "args": { + "External id": 991255,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941484294.090, "dur": 1.018, + "args": { + "External id": 991256,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941484300.936, "dur": 13.870, + "args": { + "External id": 991257,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941484320.379, "dur": 0.921, + "args": { + "External id": 991258,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941484326.982, "dur": 13.624, + "args": { + "External id": 991259,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941484345.412, "dur": 1.015, + "args": { + "External id": 991260,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941484350.993, "dur": 12.098, + "args": { + "External id": 991261,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941484367.547, "dur": 1.170, + "args": { + "External id": 991262,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941484373.176, "dur": 12.307, + "args": { + "External id": 991263,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941484390.096, "dur": 1.145, + "args": { + "External id": 991264,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941484395.649, "dur": 13.044, + "args": { + "External id": 991265,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941484415.627, "dur": 1.328, + "args": { + "External id": 991266,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941484422.273, "dur": 12.226, + "args": { + "External id": 991267,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941484438.782, "dur": 1.071, + "args": { + "External id": 991268,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941484443.854, "dur": 14.497, + "args": { + "External id": 991269,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941484575.175, "dur": 3416.131, + "args": { + "External id": 991270,"Record function id": 0, "Ev Idx": 9253 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.14)", "pid": 2338710, "tid": 2379450, + "ts": 6345941484600.223, "dur": 1259.345, + "args": { + "External id": 991271,"Record function id": 0, "Ev Idx": 9254 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2338710, "tid": 2379450, + "ts": 6345941484622.140, "dur": 384.766, + "args": { + "External id": 991272,"Record function id": 0, "Ev Idx": 9255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941484718.627, "dur": 8.521, + "args": { + "External id": 991273,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941484732.575, "dur": 1.310, + "args": { + "External id": 991274,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941484736.424, "dur": 1.230, + "args": { + "External id": 991275,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941484739.486, "dur": 0.915, + "args": { + "External id": 991276,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941484742.620, "dur": 0.830, + "args": { + "External id": 991277,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941484745.050, "dur": 1.000, + "args": { + "External id": 991278,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941484747.709, "dur": 1.085, + "args": { + "External id": 991279,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941484750.719, "dur": 2.210, + "args": { + "External id": 991280,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941484756.943, "dur": 3.273, + "args": { + "External id": 991281,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941484761.748, "dur": 0.731, + "args": { + "External id": 991282,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941484783.908, "dur": 185.923, + "args": { + "External id": 991283,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941484806.215, "dur": 157.075, + "args": { + "External id": 991284,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941484825.797, "dur": 19.111, + "args": { + "External id": 991285,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941484850.666, "dur": 79.221, + "args": { + "External id": 991286,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941484854.085, "dur": 75.316, + "args": { + "External id": 991287,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941484860.290, "dur": 7.255, + "args": { + "External id": 991288,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941484870.068, "dur": 58.591, + "args": { + "External id": 991289,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9272 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.13", "pid": 2338710, "tid": 2379450, + "ts": 6345941485178.387, "dur": 672.135, + "args": { + "External id": 991290,"Record function id": 0, "Ev Idx": 9273 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2338710, "tid": 2379450, + "ts": 6345941485202.210, "dur": 633.929, + "args": { + "External id": 991291,"Record function id": 0, "Ev Idx": 9274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941485274.942, "dur": 8.073, + "args": { + "External id": 991292,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941485302.574, "dur": 36.274, + "args": { + "External id": 991293,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941485310.330, "dur": 2.128, + "args": { + "External id": 991294,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941485315.995, "dur": 0.621, + "args": { + "External id": 991295,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941485318.750, "dur": 0.590, + "args": { + "External id": 991296,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941485321.303, "dur": 2.647, + "args": { + "External id": 991297,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941485325.503, "dur": 0.675, + "args": { + "External id": 991298,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941485327.901, "dur": 0.556, + "args": { + "External id": 991299,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941485330.005, "dur": 0.325, + "args": { + "External id": 991300,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941485331.330, "dur": 0.472, + "args": { + "External id": 991301,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941485333.829, "dur": 0.494, + "args": { + "External id": 991302,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941485351.090, "dur": 55.954, + "args": { + "External id": 991303,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345941485444.564, "dur": 130.563, + "args": { + "External id": 991304,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941485457.395, "dur": 3.168, + "args": { + "External id": 991305,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345941485469.809, "dur": 11.771, + "args": { + "External id": 991306,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345941485474.992, "dur": 6.137, + "args": { + "External id": 991307,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941485479.043, "dur": 0.598, + "args": { + "External id": 991308,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941485488.570, "dur": 26.784, + "args": { + "External id": 991309,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941485490.864, "dur": 2.727, + "args": { + "External id": 991310,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941485494.820, "dur": 0.507, + "args": { + "External id": 991311,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941485497.260, "dur": 0.466, + "args": { + "External id": 991312,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941485499.476, "dur": 0.574, + "args": { + "External id": 991313,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941485500.968, "dur": 0.552, + "args": { + "External id": 991314,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941485503.475, "dur": 0.593, + "args": { + "External id": 991315,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941485506.098, "dur": 0.491, + "args": { + "External id": 991316,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941485507.649, "dur": 0.473, + "args": { + "External id": 991317,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941485510.089, "dur": 2.400, + "args": { + "External id": 991318,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941485530.341, "dur": 36.482, + "args": { + "External id": 991319,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941485626.580, "dur": 132.327, + "args": { + "External id": 991320,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941485655.243, "dur": 99.727, + "args": { + "External id": 991321,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9304, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941485665.717, "dur": 84.267, + "args": { + "External id": 991322,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941485776.664, "dur": 2.015, + "args": { + "External id": 991323,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9306, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941485868.215, "dur": 2099.970, + "args": { + "External id": 991324,"Sequence number": 10552501, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9307 + } + }, + { + "ph": "f", "id": 418, "pid": 2338710, "tid": 2379450, "ts": 6345941485868.215, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941485992.749, "dur": 192.697, + "args": { + "External id": 991325,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941486242.724, "dur": 47.612, + "args": { + "External id": 991326,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345941486314.665, "dur": 65.240, + "args": { + "External id": 991327,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941486392.154, "dur": 37.538, + "args": { + "External id": 991328,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941486436.918, "dur": 36.921, + "args": { + "External id": 991329,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941486481.418, "dur": 32.726, + "args": { + "External id": 991330,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941486523.746, "dur": 32.395, + "args": { + "External id": 991331,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941486586.059, "dur": 28.195, + "args": { + "External id": 991332,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941486636.508, "dur": 34.061, + "args": { + "External id": 991333,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941486695.150, "dur": 21.945, + "args": { + "External id": 991334,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941486736.621, "dur": 17.399, + "args": { + "External id": 991335,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941486765.860, "dur": 47.015, + "args": { + "External id": 991336,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941486817.350, "dur": 38.294, + "args": { + "External id": 991337,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345941486888.546, "dur": 396.808, + "args": { + "External id": 991338,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941486979.901, "dur": 6.420, + "args": { + "External id": 991339,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941486988.795, "dur": 3.079, + "args": { + "External id": 991340,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941486993.420, "dur": 2.310, + "args": { + "External id": 991341,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941486997.367, "dur": 5.222, + "args": { + "External id": 991342,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941487136.225, "dur": 8.296, + "args": { + "External id": 991343,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941487139.185, "dur": 4.275, + "args": { + "External id": 991344,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941487146.841, "dur": 44.867, + "args": { + "External id": 991345,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941487157.689, "dur": 2.176, + "args": { + "External id": 991346,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941487193.918, "dur": 2.226, + "args": { + "External id": 991347,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941487195.254, "dur": 0.776, + "args": { + "External id": 991348,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941487197.448, "dur": 21.598, + "args": { + "External id": 991349,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941487200.200, "dur": 0.564, + "args": { + "External id": 991350,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941487332.489, "dur": 38.560, + "args": { + "External id": 991351,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941487392.362, "dur": 20.698, + "args": { + "External id": 991352,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941487423.396, "dur": 58.969, + "args": { + "External id": 991353,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941487491.240, "dur": 46.166, + "args": { + "External id": 991354,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941487559.509, "dur": 25.900, + "args": { + "External id": 991355,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941487592.989, "dur": 35.853, + "args": { + "External id": 991356,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941487638.114, "dur": 34.059, + "args": { + "External id": 991357,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941487680.356, "dur": 35.686, + "args": { + "External id": 991358,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345941487741.580, "dur": 32.227, + "args": { + "External id": 991359,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941487793.971, "dur": 28.822, + "args": { + "External id": 991360,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941487839.126, "dur": 20.559, + "args": { + "External id": 991361,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941487878.241, "dur": 16.772, + "args": { + "External id": 991362,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345941487913.365, "dur": 18.052, + "args": { + "External id": 991363,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941488038.072, "dur": 63.733, + "args": { + "External id": 991364,"Record function id": 0, "Ev Idx": 9347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941488043.645, "dur": 56.231, + "args": { + "External id": 991365,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941488049.040, "dur": 47.151, + "args": { + "External id": 991366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941488051.666, "dur": 43.885, + "args": { + "External id": 991367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941488110.645, "dur": 8.447, + "args": { + "External id": 991368,"Record function id": 0, "Ev Idx": 9351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941488112.936, "dur": 5.520, + "args": { + "External id": 991369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941488114.624, "dur": 3.130, + "args": { + "External id": 991370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941488115.908, "dur": 1.722, + "args": { + "External id": 991371,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941488123.157, "dur": 5.874, + "args": { + "External id": 991372,"Record function id": 0, "Ev Idx": 9355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941488124.936, "dur": 3.621, + "args": { + "External id": 991373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941488125.619, "dur": 2.406, + "args": { + "External id": 991374,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941488126.578, "dur": 1.354, + "args": { + "External id": 991375,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941488132.680, "dur": 8.448, + "args": { + "External id": 991376,"Record function id": 0, "Ev Idx": 9359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941488134.120, "dur": 6.368, + "args": { + "External id": 991377,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941488134.964, "dur": 5.027, + "args": { + "External id": 991378,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941488136.124, "dur": 3.761, + "args": { + "External id": 991379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941488144.867, "dur": 6.523, + "args": { + "External id": 991380,"Record function id": 0, "Ev Idx": 9363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941488146.503, "dur": 4.401, + "args": { + "External id": 991381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941488147.333, "dur": 2.907, + "args": { + "External id": 991382,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941488148.417, "dur": 1.703, + "args": { + "External id": 991383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941488155.257, "dur": 6.597, + "args": { + "External id": 991384,"Record function id": 0, "Ev Idx": 9367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941488157.213, "dur": 4.144, + "args": { + "External id": 991385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941488157.834, "dur": 3.028, + "args": { + "External id": 991386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941488158.998, "dur": 1.742, + "args": { + "External id": 991387,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941488165.783, "dur": 5.655, + "args": { + "External id": 991388,"Record function id": 0, "Ev Idx": 9371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941488167.313, "dur": 3.594, + "args": { + "External id": 991389,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941488167.865, "dur": 2.567, + "args": { + "External id": 991390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941488168.956, "dur": 1.353, + "args": { + "External id": 991391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941488175.022, "dur": 5.492, + "args": { + "External id": 991392,"Record function id": 0, "Ev Idx": 9375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941488176.391, "dur": 3.619, + "args": { + "External id": 991393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941488176.943, "dur": 2.581, + "args": { + "External id": 991394,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941488178.120, "dur": 1.280, + "args": { + "External id": 991395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941488184.093, "dur": 6.627, + "args": { + "External id": 991396,"Record function id": 0, "Ev Idx": 9379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941488185.812, "dur": 4.401, + "args": { + "External id": 991397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941488186.521, "dur": 3.219, + "args": { + "External id": 991398,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941488187.762, "dur": 1.856, + "args": { + "External id": 991399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941488195.536, "dur": 62198.078, + "args": { + "External id": 991400,"Record function id": 0, "Sequence number": 10552500, "Fwd thread id": 1, "Ev Idx": 9383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941488197.260, "dur": 62185.727, + "args": { + "External id": 991401,"Sequence number": 10552500, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9384 + } + }, + { + "ph": "f", "id": 419, "pid": 2338710, "tid": 2379450, "ts": 6345941488197.260, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.14)", "pid": 2338710, "tid": 2379450, + "ts": 6345941488233.584, "dur": 51.040, + "args": { + "External id": 991402,"Record function id": 0, "Ev Idx": 9385 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.14)", "pid": 2338710, "tid": 2379450, + "ts": 6345941488295.058, "dur": 76.949, + "args": { + "External id": 991403,"Record function id": 0, "Ev Idx": 9386 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.14)", "pid": 2338710, "tid": 2379450, + "ts": 6345941488378.876, "dur": 61994.265, + "args": { + "External id": 991404,"Record function id": 0, "Ev Idx": 9387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941488489.185, "dur": 8.747, + "args": { + "External id": 991405,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941488509.742, "dur": 5.333, + "args": { + "External id": 991406,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941488532.022, "dur": 60686.949, + "args": { + "External id": 991407,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941488547.960, "dur": 60654.029, + "args": { + "External id": 991408,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941488666.173, "dur": 22.207, + "args": { + "External id": 991409,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941488712.386, "dur": 60435.844, + "args": { + "External id": 991410,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941488716.729, "dur": 60430.427, + "args": { + "External id": 991411,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941488722.221, "dur": 11.612, + "args": { + "External id": 991412,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941488739.264, "dur": 60400.863, + "args": { + "External id": 991413,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941549365.498, "dur": 16.160, + "args": { + "External id": 991414,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941549370.974, "dur": 10.056, + "args": { + "External id": 991415,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941549425.453, "dur": 416.105, + "args": { + "External id": 991416,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941549465.814, "dur": 369.071, + "args": { + "External id": 991417,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9400, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941549482.750, "dur": 344.060, + "args": { + "External id": 991418,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941549865.378, "dur": 2.319, + "args": { + "External id": 991419,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9402, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941549934.873, "dur": 8.393, + "args": { + "External id": 991420,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941549960.496, "dur": 42.877, + "args": { + "External id": 991421,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941550039.580, "dur": 3.984, + "args": { + "External id": 991422,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941550051.222, "dur": 58.827, + "args": { + "External id": 991423,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941550120.914, "dur": 2.845, + "args": { + "External id": 991424,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941550129.527, "dur": 16.331, + "args": { + "External id": 991425,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941550152.162, "dur": 1.333, + "args": { + "External id": 991426,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941550159.367, "dur": 15.087, + "args": { + "External id": 991427,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941550179.277, "dur": 1.166, + "args": { + "External id": 991428,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941550184.894, "dur": 15.155, + "args": { + "External id": 991429,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941550204.412, "dur": 1.328, + "args": { + "External id": 991430,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941550210.668, "dur": 14.037, + "args": { + "External id": 991431,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941550229.163, "dur": 3.315, + "args": { + "External id": 991432,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941550237.214, "dur": 14.713, + "args": { + "External id": 991433,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941550257.067, "dur": 1.012, + "args": { + "External id": 991434,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941550262.106, "dur": 13.969, + "args": { + "External id": 991435,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941550280.605, "dur": 0.916, + "args": { + "External id": 991436,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941550285.670, "dur": 14.117, + "args": { + "External id": 991437,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941550414.297, "dur": 3350.109, + "args": { + "External id": 991438,"Record function id": 0, "Ev Idx": 9421 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.13)", "pid": 2338710, "tid": 2379450, + "ts": 6345941550437.922, "dur": 1228.179, + "args": { + "External id": 991439,"Record function id": 0, "Ev Idx": 9422 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2338710, "tid": 2379450, + "ts": 6345941550454.764, "dur": 363.498, + "args": { + "External id": 991440,"Record function id": 0, "Ev Idx": 9423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941550548.401, "dur": 5.273, + "args": { + "External id": 991441,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941550557.292, "dur": 1.088, + "args": { + "External id": 991442,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941550561.310, "dur": 1.125, + "args": { + "External id": 991443,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941550564.721, "dur": 0.974, + "args": { + "External id": 991444,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941550568.308, "dur": 1.014, + "args": { + "External id": 991445,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941550570.863, "dur": 3.339, + "args": { + "External id": 991446,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941550576.039, "dur": 1.066, + "args": { + "External id": 991447,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941550581.006, "dur": 2.204, + "args": { + "External id": 991448,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941550585.096, "dur": 1.005, + "args": { + "External id": 991449,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941550587.649, "dur": 0.888, + "args": { + "External id": 991450,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941550608.987, "dur": 177.415, + "args": { + "External id": 991451,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941550629.276, "dur": 150.983, + "args": { + "External id": 991452,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941550648.342, "dur": 17.695, + "args": { + "External id": 991453,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941550672.121, "dur": 77.211, + "args": { + "External id": 991454,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941550675.564, "dur": 73.392, + "args": { + "External id": 991455,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941550680.649, "dur": 5.845, + "args": { + "External id": 991456,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941550689.104, "dur": 59.057, + "args": { + "External id": 991457,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9440 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.12", "pid": 2338710, "tid": 2379450, + "ts": 6345941550905.199, "dur": 751.444, + "args": { + "External id": 991458,"Record function id": 0, "Ev Idx": 9441 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2338710, "tid": 2379450, + "ts": 6345941550922.478, "dur": 720.119, + "args": { + "External id": 991459,"Record function id": 0, "Ev Idx": 9442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941550985.113, "dur": 7.509, + "args": { + "External id": 991460,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941551031.638, "dur": 87.846, + "args": { + "External id": 991461,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941551039.090, "dur": 4.332, + "args": { + "External id": 991462,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941551046.123, "dur": 0.710, + "args": { + "External id": 991463,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941551092.184, "dur": 0.779, + "args": { + "External id": 991464,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941551096.770, "dur": 0.561, + "args": { + "External id": 991465,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941551098.619, "dur": 0.626, + "args": { + "External id": 991466,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941551101.706, "dur": 0.651, + "args": { + "External id": 991467,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941551104.005, "dur": 0.522, + "args": { + "External id": 991468,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941551105.830, "dur": 0.439, + "args": { + "External id": 991469,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941551108.245, "dur": 6.321, + "args": { + "External id": 991470,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941551133.287, "dur": 58.441, + "args": { + "External id": 991471,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345941551235.410, "dur": 133.258, + "args": { + "External id": 991472,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941551250.384, "dur": 5.113, + "args": { + "External id": 991473,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345941551261.295, "dur": 11.694, + "args": { + "External id": 991474,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345941551266.376, "dur": 6.134, + "args": { + "External id": 991475,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941551270.320, "dur": 0.730, + "args": { + "External id": 991476,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941551281.830, "dur": 32.555, + "args": { + "External id": 991477,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941551284.192, "dur": 0.844, + "args": { + "External id": 991478,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941551286.448, "dur": 0.689, + "args": { + "External id": 991479,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941551289.312, "dur": 0.618, + "args": { + "External id": 991480,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941551292.066, "dur": 0.664, + "args": { + "External id": 991481,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941551293.990, "dur": 0.680, + "args": { + "External id": 991482,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941551302.062, "dur": 2.759, + "args": { + "External id": 991483,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941551306.678, "dur": 0.606, + "args": { + "External id": 991484,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941551308.770, "dur": 0.400, + "args": { + "External id": 991485,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941551310.734, "dur": 0.523, + "args": { + "External id": 991486,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941551325.875, "dur": 34.137, + "args": { + "External id": 991487,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941551422.335, "dur": 140.639, + "args": { + "External id": 991488,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941551455.225, "dur": 103.638, + "args": { + "External id": 991489,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9472, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941551467.170, "dur": 87.081, + "args": { + "External id": 991490,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941551580.988, "dur": 2.191, + "args": { + "External id": 991491,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9474, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941551674.125, "dur": 2068.393, + "args": { + "External id": 991492,"Sequence number": 10552499, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9475 + } + }, + { + "ph": "f", "id": 420, "pid": 2338710, "tid": 2379450, "ts": 6345941551674.125, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941551800.250, "dur": 124.116, + "args": { + "External id": 991493,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941551974.566, "dur": 68.459, + "args": { + "External id": 991494,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345941552111.733, "dur": 69.630, + "args": { + "External id": 991495,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941552193.651, "dur": 37.011, + "args": { + "External id": 991496,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941552237.871, "dur": 35.435, + "args": { + "External id": 991497,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941552283.579, "dur": 30.614, + "args": { + "External id": 991498,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941552321.429, "dur": 31.888, + "args": { + "External id": 991499,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941552384.131, "dur": 28.612, + "args": { + "External id": 991500,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941552433.092, "dur": 34.021, + "args": { + "External id": 991501,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941552490.839, "dur": 23.105, + "args": { + "External id": 991502,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941552533.087, "dur": 18.344, + "args": { + "External id": 991503,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941552561.726, "dur": 45.462, + "args": { + "External id": 991504,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941552611.668, "dur": 39.195, + "args": { + "External id": 991505,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345941552684.751, "dur": 350.607, + "args": { + "External id": 991506,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941552779.910, "dur": 9.433, + "args": { + "External id": 991507,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941552792.127, "dur": 3.322, + "args": { + "External id": 991508,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941552797.244, "dur": 2.854, + "args": { + "External id": 991509,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941552801.571, "dur": 2.475, + "args": { + "External id": 991510,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941552876.539, "dur": 6.769, + "args": { + "External id": 991511,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941552878.866, "dur": 3.845, + "args": { + "External id": 991512,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941552885.501, "dur": 39.459, + "args": { + "External id": 991513,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941552892.129, "dur": 2.266, + "args": { + "External id": 991514,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941552926.735, "dur": 2.124, + "args": { + "External id": 991515,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941552928.026, "dur": 0.689, + "args": { + "External id": 991516,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941552929.997, "dur": 22.019, + "args": { + "External id": 991517,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941552932.349, "dur": 0.873, + "args": { + "External id": 991518,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941553120.042, "dur": 36.504, + "args": { + "External id": 991519,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941553185.584, "dur": 21.192, + "args": { + "External id": 991520,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941553217.124, "dur": 61.073, + "args": { + "External id": 991521,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941553286.160, "dur": 48.898, + "args": { + "External id": 991522,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941553346.776, "dur": 27.777, + "args": { + "External id": 991523,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941553382.473, "dur": 36.823, + "args": { + "External id": 991524,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941553426.837, "dur": 33.939, + "args": { + "External id": 991525,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941553469.076, "dur": 35.130, + "args": { + "External id": 991526,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345941553525.168, "dur": 29.333, + "args": { + "External id": 991527,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941553572.124, "dur": 29.159, + "args": { + "External id": 991528,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941553617.186, "dur": 21.262, + "args": { + "External id": 991529,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941553654.549, "dur": 16.690, + "args": { + "External id": 991530,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345941553685.130, "dur": 18.997, + "args": { + "External id": 991531,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941553790.335, "dur": 19.011, + "args": { + "External id": 991532,"Record function id": 0, "Ev Idx": 9515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941553794.640, "dur": 13.473, + "args": { + "External id": 991533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941553799.927, "dur": 7.026, + "args": { + "External id": 991534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941553802.013, "dur": 4.812, + "args": { + "External id": 991535,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941553817.364, "dur": 6.594, + "args": { + "External id": 991536,"Record function id": 0, "Ev Idx": 9519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941553818.922, "dur": 4.376, + "args": { + "External id": 991537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941553819.703, "dur": 3.015, + "args": { + "External id": 991538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941553820.979, "dur": 1.586, + "args": { + "External id": 991539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941553827.844, "dur": 5.823, + "args": { + "External id": 991540,"Record function id": 0, "Ev Idx": 9523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941553829.529, "dur": 3.649, + "args": { + "External id": 991541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941553830.098, "dur": 2.615, + "args": { + "External id": 991542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941553831.212, "dur": 1.415, + "args": { + "External id": 991543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941553837.427, "dur": 5.657, + "args": { + "External id": 991544,"Record function id": 0, "Ev Idx": 9527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941553838.816, "dur": 3.778, + "args": { + "External id": 991545,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941553839.432, "dur": 2.670, + "args": { + "External id": 991546,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941553840.774, "dur": 1.229, + "args": { + "External id": 991547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941553846.730, "dur": 8.881, + "args": { + "External id": 991548,"Record function id": 0, "Ev Idx": 9531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941553848.577, "dur": 6.577, + "args": { + "External id": 991549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941553849.137, "dur": 5.439, + "args": { + "External id": 991550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941553850.025, "dur": 4.414, + "args": { + "External id": 991551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941553859.421, "dur": 6.221, + "args": { + "External id": 991552,"Record function id": 0, "Ev Idx": 9535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941553860.928, "dur": 4.246, + "args": { + "External id": 991553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941553861.575, "dur": 3.029, + "args": { + "External id": 991554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941553862.935, "dur": 1.584, + "args": { + "External id": 991555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941553872.680, "dur": 5.582, + "args": { + "External id": 991556,"Record function id": 0, "Ev Idx": 9539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941553874.130, "dur": 3.626, + "args": { + "External id": 991557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941553874.665, "dur": 2.507, + "args": { + "External id": 991558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941553875.678, "dur": 1.368, + "args": { + "External id": 991559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941553881.903, "dur": 5.655, + "args": { + "External id": 991560,"Record function id": 0, "Ev Idx": 9543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941553883.564, "dur": 3.520, + "args": { + "External id": 991561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941553884.109, "dur": 2.470, + "args": { + "External id": 991562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941553884.944, "dur": 1.546, + "args": { + "External id": 991563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941553891.782, "dur": 5.155, + "args": { + "External id": 991564,"Record function id": 0, "Ev Idx": 9547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941553893.302, "dur": 3.128, + "args": { + "External id": 991565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941553893.840, "dur": 2.045, + "args": { + "External id": 991566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941553894.665, "dur": 1.097, + "args": { + "External id": 991567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941553901.455, "dur": 60286.605, + "args": { + "External id": 991568,"Record function id": 0, "Sequence number": 10552498, "Fwd thread id": 1, "Ev Idx": 9551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941553903.047, "dur": 60273.389, + "args": { + "External id": 991569,"Sequence number": 10552498, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9552 + } + }, + { + "ph": "f", "id": 421, "pid": 2338710, "tid": 2379450, "ts": 6345941553903.047, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.13)", "pid": 2338710, "tid": 2379450, + "ts": 6345941553940.857, "dur": 48.475, + "args": { + "External id": 991570,"Record function id": 0, "Ev Idx": 9553 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.13)", "pid": 2338710, "tid": 2379450, + "ts": 6345941553998.867, "dur": 136.131, + "args": { + "External id": 991571,"Record function id": 0, "Ev Idx": 9554 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.13)", "pid": 2338710, "tid": 2379450, + "ts": 6345941554144.289, "dur": 60022.557, + "args": { + "External id": 991572,"Record function id": 0, "Ev Idx": 9555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941554252.415, "dur": 8.725, + "args": { + "External id": 991573,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941554274.217, "dur": 6.009, + "args": { + "External id": 991574,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941554297.462, "dur": 58651.870, + "args": { + "External id": 991575,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941554313.818, "dur": 58619.016, + "args": { + "External id": 991576,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941554431.826, "dur": 23.025, + "args": { + "External id": 991577,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941554483.525, "dur": 58386.158, + "args": { + "External id": 991578,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941554488.629, "dur": 58379.792, + "args": { + "External id": 991579,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941554493.956, "dur": 10.044, + "args": { + "External id": 991580,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941554511.107, "dur": 58350.938, + "args": { + "External id": 991581,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941613140.792, "dur": 18.354, + "args": { + "External id": 991582,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941613146.579, "dur": 11.892, + "args": { + "External id": 991583,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941613201.486, "dur": 442.949, + "args": { + "External id": 991584,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941613242.453, "dur": 396.282, + "args": { + "External id": 991585,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9568, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941613261.281, "dur": 370.364, + "args": { + "External id": 991586,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941613664.855, "dur": 2.580, + "args": { + "External id": 991587,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9570, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941613733.621, "dur": 10.733, + "args": { + "External id": 991588,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941613759.950, "dur": 44.711, + "args": { + "External id": 991589,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941613817.320, "dur": 3.113, + "args": { + "External id": 991590,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941613826.825, "dur": 16.867, + "args": { + "External id": 991591,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941613850.186, "dur": 1.111, + "args": { + "External id": 991592,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941613857.475, "dur": 16.688, + "args": { + "External id": 991593,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941613880.182, "dur": 0.849, + "args": { + "External id": 991594,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941613887.422, "dur": 14.290, + "args": { + "External id": 991595,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941613906.500, "dur": 1.056, + "args": { + "External id": 991596,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941613911.965, "dur": 14.555, + "args": { + "External id": 991597,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941613930.830, "dur": 1.243, + "args": { + "External id": 991598,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941613936.505, "dur": 12.535, + "args": { + "External id": 991599,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941613953.968, "dur": 1.082, + "args": { + "External id": 991600,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941613959.213, "dur": 12.489, + "args": { + "External id": 991601,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941613978.263, "dur": 1.287, + "args": { + "External id": 991602,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941613985.182, "dur": 13.333, + "args": { + "External id": 991603,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941614003.326, "dur": 3.568, + "args": { + "External id": 991604,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941614035.089, "dur": 54.289, + "args": { + "External id": 991605,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941614210.236, "dur": 3357.901, + "args": { + "External id": 991606,"Record function id": 0, "Ev Idx": 9589 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.12)", "pid": 2338710, "tid": 2379450, + "ts": 6345941614233.507, "dur": 1218.126, + "args": { + "External id": 991607,"Record function id": 0, "Ev Idx": 9590 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2338710, "tid": 2379450, + "ts": 6345941614254.961, "dur": 376.217, + "args": { + "External id": 991608,"Record function id": 0, "Ev Idx": 9591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941614352.391, "dur": 7.157, + "args": { + "External id": 991609,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941614363.145, "dur": 1.237, + "args": { + "External id": 991610,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941614367.045, "dur": 1.076, + "args": { + "External id": 991611,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941614370.127, "dur": 1.174, + "args": { + "External id": 991612,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941614373.406, "dur": 1.084, + "args": { + "External id": 991613,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941614375.879, "dur": 1.005, + "args": { + "External id": 991614,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941614378.607, "dur": 0.872, + "args": { + "External id": 991615,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941614381.139, "dur": 4.535, + "args": { + "External id": 991616,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941614387.477, "dur": 0.894, + "args": { + "External id": 991617,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941614392.093, "dur": 0.967, + "args": { + "External id": 991618,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941614413.310, "dur": 184.791, + "args": { + "External id": 991619,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941614435.655, "dur": 157.176, + "args": { + "External id": 991620,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941614455.653, "dur": 17.116, + "args": { + "External id": 991621,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941614478.340, "dur": 81.603, + "args": { + "External id": 991622,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941614481.823, "dur": 77.721, + "args": { + "External id": 991623,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941614486.875, "dur": 6.231, + "args": { + "External id": 991624,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941614495.583, "dur": 63.119, + "args": { + "External id": 991625,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9608 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.11", "pid": 2338710, "tid": 2379450, + "ts": 6345941614719.871, "dur": 722.845, + "args": { + "External id": 991626,"Record function id": 0, "Ev Idx": 9609 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2338710, "tid": 2379450, + "ts": 6345941614737.186, "dur": 690.925, + "args": { + "External id": 991627,"Record function id": 0, "Ev Idx": 9610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941614800.814, "dur": 7.382, + "args": { + "External id": 991628,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941614826.164, "dur": 33.954, + "args": { + "External id": 991629,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941614832.475, "dur": 1.906, + "args": { + "External id": 991630,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941614836.857, "dur": 0.824, + "args": { + "External id": 991631,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941614839.625, "dur": 2.560, + "args": { + "External id": 991632,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941614844.244, "dur": 0.435, + "args": { + "External id": 991633,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941614845.910, "dur": 0.596, + "args": { + "External id": 991634,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941614848.698, "dur": 0.492, + "args": { + "External id": 991635,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941614851.551, "dur": 0.537, + "args": { + "External id": 991636,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941614853.506, "dur": 0.367, + "args": { + "External id": 991637,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941614855.651, "dur": 0.431, + "args": { + "External id": 991638,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941614871.266, "dur": 46.916, + "args": { + "External id": 991639,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345941614953.857, "dur": 199.476, + "args": { + "External id": 991640,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941614966.198, "dur": 3.683, + "args": { + "External id": 991641,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345941614975.882, "dur": 14.600, + "args": { + "External id": 991642,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345941614981.372, "dur": 8.604, + "args": { + "External id": 991643,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941614985.282, "dur": 3.313, + "args": { + "External id": 991644,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941614999.098, "dur": 49.158, + "args": { + "External id": 991645,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941615001.320, "dur": 0.639, + "args": { + "External id": 991646,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941615003.312, "dur": 0.886, + "args": { + "External id": 991647,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941615006.441, "dur": 0.479, + "args": { + "External id": 991648,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941615028.798, "dur": 0.615, + "args": { + "External id": 991649,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941615032.110, "dur": 0.653, + "args": { + "External id": 991650,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941615035.172, "dur": 0.518, + "args": { + "External id": 991651,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941615037.801, "dur": 0.447, + "args": { + "External id": 991652,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941615039.787, "dur": 2.807, + "args": { + "External id": 991653,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941615044.296, "dur": 0.563, + "args": { + "External id": 991654,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941615103.073, "dur": 40.494, + "args": { + "External id": 991655,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941615213.626, "dur": 135.434, + "args": { + "External id": 991656,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941615243.559, "dur": 101.243, + "args": { + "External id": 991657,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9640, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941615258.682, "dur": 81.422, + "args": { + "External id": 991658,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941615368.500, "dur": 2.341, + "args": { + "External id": 991659,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9642, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941615459.579, "dur": 2082.528, + "args": { + "External id": 991660,"Sequence number": 10552497, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9643 + } + }, + { + "ph": "f", "id": 422, "pid": 2338710, "tid": 2379450, "ts": 6345941615459.579, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941615596.436, "dur": 127.528, + "args": { + "External id": 991661,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941615768.827, "dur": 47.659, + "args": { + "External id": 991662,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345941615838.138, "dur": 59.038, + "args": { + "External id": 991663,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941615908.852, "dur": 36.198, + "args": { + "External id": 991664,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941615952.325, "dur": 36.366, + "args": { + "External id": 991665,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941615995.783, "dur": 97.884, + "args": { + "External id": 991666,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941616110.515, "dur": 38.504, + "args": { + "External id": 991667,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941616182.285, "dur": 31.349, + "args": { + "External id": 991668,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941616235.398, "dur": 32.944, + "args": { + "External id": 991669,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941616292.864, "dur": 23.845, + "args": { + "External id": 991670,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941616333.180, "dur": 17.507, + "args": { + "External id": 991671,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941616363.511, "dur": 45.277, + "args": { + "External id": 991672,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941616413.530, "dur": 38.116, + "args": { + "External id": 991673,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345941616485.271, "dur": 327.292, + "args": { + "External id": 991674,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941616592.454, "dur": 10.262, + "args": { + "External id": 991675,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941616605.816, "dur": 2.628, + "args": { + "External id": 991676,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941616609.995, "dur": 4.830, + "args": { + "External id": 991677,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941616616.217, "dur": 2.688, + "args": { + "External id": 991678,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941616678.101, "dur": 6.008, + "args": { + "External id": 991679,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941616680.178, "dur": 3.635, + "args": { + "External id": 991680,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941616686.390, "dur": 43.597, + "args": { + "External id": 991681,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941616692.710, "dur": 2.163, + "args": { + "External id": 991682,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941616731.963, "dur": 2.159, + "args": { + "External id": 991683,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941616733.217, "dur": 0.782, + "args": { + "External id": 991684,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941616735.590, "dur": 17.047, + "args": { + "External id": 991685,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941616738.997, "dur": 0.700, + "args": { + "External id": 991686,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941616859.644, "dur": 30.237, + "args": { + "External id": 991687,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941616911.341, "dur": 17.842, + "args": { + "External id": 991688,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941616939.369, "dur": 48.854, + "args": { + "External id": 991689,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941616996.887, "dur": 110.685, + "args": { + "External id": 991690,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941617123.405, "dur": 31.206, + "args": { + "External id": 991691,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941617164.883, "dur": 41.179, + "args": { + "External id": 991692,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941617214.765, "dur": 36.613, + "args": { + "External id": 991693,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941617259.758, "dur": 38.294, + "args": { + "External id": 991694,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345941617321.017, "dur": 31.819, + "args": { + "External id": 991695,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941617370.603, "dur": 26.506, + "args": { + "External id": 991696,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941617412.503, "dur": 19.544, + "args": { + "External id": 991697,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941617454.108, "dur": 15.235, + "args": { + "External id": 991698,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345941617484.949, "dur": 17.703, + "args": { + "External id": 991699,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941617594.278, "dur": 18.601, + "args": { + "External id": 991700,"Record function id": 0, "Ev Idx": 9683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941617598.226, "dur": 13.374, + "args": { + "External id": 991701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941617603.278, "dur": 7.137, + "args": { + "External id": 991702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941617605.476, "dur": 4.802, + "args": { + "External id": 991703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941617617.583, "dur": 7.076, + "args": { + "External id": 991704,"Record function id": 0, "Ev Idx": 9687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941617619.673, "dur": 4.430, + "args": { + "External id": 991705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941617620.479, "dur": 3.077, + "args": { + "External id": 991706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941617621.715, "dur": 1.674, + "args": { + "External id": 991707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941617628.740, "dur": 5.919, + "args": { + "External id": 991708,"Record function id": 0, "Ev Idx": 9691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941617630.479, "dur": 3.673, + "args": { + "External id": 991709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941617631.129, "dur": 2.537, + "args": { + "External id": 991710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941617632.233, "dur": 1.293, + "args": { + "External id": 991711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941617638.640, "dur": 5.910, + "args": { + "External id": 991712,"Record function id": 0, "Ev Idx": 9695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941617640.148, "dur": 3.901, + "args": { + "External id": 991713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941617640.920, "dur": 2.649, + "args": { + "External id": 991714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941617642.201, "dur": 1.274, + "args": { + "External id": 991715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941617648.465, "dur": 6.102, + "args": { + "External id": 991716,"Record function id": 0, "Ev Idx": 9699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941617650.370, "dur": 3.703, + "args": { + "External id": 991717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941617650.968, "dur": 2.511, + "args": { + "External id": 991718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941617651.962, "dur": 1.426, + "args": { + "External id": 991719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941617658.331, "dur": 8.405, + "args": { + "External id": 991720,"Record function id": 0, "Ev Idx": 9703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941617660.133, "dur": 6.044, + "args": { + "External id": 991721,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941617660.795, "dur": 4.812, + "args": { + "External id": 991722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941617661.776, "dur": 3.730, + "args": { + "External id": 991723,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941617670.780, "dur": 6.083, + "args": { + "External id": 991724,"Record function id": 0, "Ev Idx": 9707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941617672.581, "dur": 3.821, + "args": { + "External id": 991725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941617673.146, "dur": 2.736, + "args": { + "External id": 991726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941617674.246, "dur": 1.520, + "args": { + "External id": 991727,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941617680.985, "dur": 6.074, + "args": { + "External id": 991728,"Record function id": 0, "Ev Idx": 9711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941617682.740, "dur": 3.844, + "args": { + "External id": 991729,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941617683.550, "dur": 2.507, + "args": { + "External id": 991730,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941617684.571, "dur": 1.394, + "args": { + "External id": 991731,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941617691.282, "dur": 5.620, + "args": { + "External id": 991732,"Record function id": 0, "Ev Idx": 9715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941617692.724, "dur": 3.710, + "args": { + "External id": 991733,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941617693.293, "dur": 2.596, + "args": { + "External id": 991734,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941617694.430, "dur": 1.364, + "args": { + "External id": 991735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941617701.674, "dur": 61041.021, + "args": { + "External id": 991736,"Record function id": 0, "Sequence number": 10552496, "Fwd thread id": 1, "Ev Idx": 9719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941617703.158, "dur": 61028.433, + "args": { + "External id": 991737,"Sequence number": 10552496, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9720 + } + }, + { + "ph": "f", "id": 423, "pid": 2338710, "tid": 2379450, "ts": 6345941617703.158, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.12)", "pid": 2338710, "tid": 2379450, + "ts": 6345941617738.886, "dur": 46.489, + "args": { + "External id": 991738,"Record function id": 0, "Ev Idx": 9721 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.12)", "pid": 2338710, "tid": 2379450, + "ts": 6345941617795.991, "dur": 73.139, + "args": { + "External id": 991739,"Record function id": 0, "Ev Idx": 9722 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.12)", "pid": 2338710, "tid": 2379450, + "ts": 6345941617876.814, "dur": 60844.876, + "args": { + "External id": 991740,"Record function id": 0, "Ev Idx": 9723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941617983.925, "dur": 8.120, + "args": { + "External id": 991741,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941618003.417, "dur": 26.412, + "args": { + "External id": 991742,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941618050.269, "dur": 59528.168, + "args": { + "External id": 991743,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941618107.431, "dur": 59454.193, + "args": { + "External id": 991744,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941618252.905, "dur": 26.981, + "args": { + "External id": 991745,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941618316.282, "dur": 59194.085, + "args": { + "External id": 991746,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941618320.060, "dur": 59189.046, + "args": { + "External id": 991747,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941618329.428, "dur": 14.605, + "args": { + "External id": 991748,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941618349.356, "dur": 59157.442, + "args": { + "External id": 991749,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941677707.331, "dur": 15.573, + "args": { + "External id": 991750,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941677712.413, "dur": 10.018, + "args": { + "External id": 991751,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941677758.814, "dur": 503.053, + "args": { + "External id": 991752,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941677799.850, "dur": 454.155, + "args": { + "External id": 991753,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9736, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941677814.309, "dur": 431.834, + "args": { + "External id": 991754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941678292.225, "dur": 2.874, + "args": { + "External id": 991755,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9738, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941678372.519, "dur": 9.130, + "args": { + "External id": 991756,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941678400.042, "dur": 44.150, + "args": { + "External id": 991757,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941678456.765, "dur": 5.090, + "args": { + "External id": 991758,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941678468.059, "dur": 15.980, + "args": { + "External id": 991759,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941678490.412, "dur": 0.962, + "args": { + "External id": 991760,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941678497.381, "dur": 14.487, + "args": { + "External id": 991761,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941678517.647, "dur": 0.806, + "args": { + "External id": 991762,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941678523.489, "dur": 13.719, + "args": { + "External id": 991763,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941678542.190, "dur": 0.973, + "args": { + "External id": 991764,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941678547.292, "dur": 14.084, + "args": { + "External id": 991765,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941678566.319, "dur": 1.178, + "args": { + "External id": 991766,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941678572.067, "dur": 13.861, + "args": { + "External id": 991767,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941678591.140, "dur": 0.910, + "args": { + "External id": 991768,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941678596.850, "dur": 12.670, + "args": { + "External id": 991769,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941678614.404, "dur": 0.856, + "args": { + "External id": 991770,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941678619.405, "dur": 10.086, + "args": { + "External id": 991771,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941678636.532, "dur": 0.825, + "args": { + "External id": 991772,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941678641.415, "dur": 11.245, + "args": { + "External id": 991773,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941678760.466, "dur": 3442.412, + "args": { + "External id": 991774,"Record function id": 0, "Ev Idx": 9757 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.11)", "pid": 2338710, "tid": 2379450, + "ts": 6345941678782.887, "dur": 1223.222, + "args": { + "External id": 991775,"Record function id": 0, "Ev Idx": 9758 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2338710, "tid": 2379450, + "ts": 6345941678800.777, "dur": 448.915, + "args": { + "External id": 991776,"Record function id": 0, "Ev Idx": 9759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941678891.633, "dur": 7.570, + "args": { + "External id": 991777,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941678903.773, "dur": 0.876, + "args": { + "External id": 991778,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941678906.761, "dur": 0.942, + "args": { + "External id": 991779,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941678910.134, "dur": 1.049, + "args": { + "External id": 991780,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941678912.771, "dur": 1.057, + "args": { + "External id": 991781,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941678915.349, "dur": 0.778, + "args": { + "External id": 991782,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941678917.980, "dur": 0.984, + "args": { + "External id": 991783,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941678922.652, "dur": 2.141, + "args": { + "External id": 991784,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941678926.599, "dur": 2.802, + "args": { + "External id": 991785,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941678931.379, "dur": 0.589, + "args": { + "External id": 991786,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941678957.725, "dur": 249.893, + "args": { + "External id": 991787,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941678978.454, "dur": 222.707, + "args": { + "External id": 991788,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941678995.194, "dur": 41.868, + "args": { + "External id": 991789,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941679043.805, "dur": 124.003, + "args": { + "External id": 991790,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941679046.952, "dur": 120.488, + "args": { + "External id": 991791,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941679090.967, "dur": 9.534, + "args": { + "External id": 991792,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941679103.589, "dur": 63.003, + "args": { + "External id": 991793,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9776 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.10", "pid": 2338710, "tid": 2379450, + "ts": 6345941679346.553, "dur": 650.219, + "args": { + "External id": 991794,"Record function id": 0, "Ev Idx": 9777 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2338710, "tid": 2379450, + "ts": 6345941679368.128, "dur": 614.604, + "args": { + "External id": 991795,"Record function id": 0, "Ev Idx": 9778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941679438.115, "dur": 8.032, + "args": { + "External id": 991796,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941679464.125, "dur": 34.197, + "args": { + "External id": 991797,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941679470.408, "dur": 1.826, + "args": { + "External id": 991798,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941679474.712, "dur": 0.644, + "args": { + "External id": 991799,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941679477.324, "dur": 0.330, + "args": { + "External id": 991800,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941679479.291, "dur": 2.666, + "args": { + "External id": 991801,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941679483.972, "dur": 0.484, + "args": { + "External id": 991802,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941679486.063, "dur": 0.508, + "args": { + "External id": 991803,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941679488.418, "dur": 0.642, + "args": { + "External id": 991804,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941679491.275, "dur": 0.504, + "args": { + "External id": 991805,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941679493.474, "dur": 0.378, + "args": { + "External id": 991806,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941679511.223, "dur": 47.912, + "args": { + "External id": 991807,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345941679595.428, "dur": 121.804, + "args": { + "External id": 991808,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941679607.742, "dur": 3.654, + "args": { + "External id": 991809,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345941679617.652, "dur": 11.384, + "args": { + "External id": 991810,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345941679622.427, "dur": 6.118, + "args": { + "External id": 991811,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941679626.539, "dur": 0.646, + "args": { + "External id": 991812,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941679636.532, "dur": 28.763, + "args": { + "External id": 991813,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941679638.583, "dur": 3.025, + "args": { + "External id": 991814,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941679643.523, "dur": 0.626, + "args": { + "External id": 991815,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941679645.975, "dur": 0.553, + "args": { + "External id": 991816,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941679648.482, "dur": 0.465, + "args": { + "External id": 991817,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941679650.972, "dur": 0.642, + "args": { + "External id": 991818,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941679652.699, "dur": 0.637, + "args": { + "External id": 991819,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941679655.378, "dur": 0.817, + "args": { + "External id": 991820,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941679657.956, "dur": 0.323, + "args": { + "External id": 991821,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941679659.667, "dur": 2.639, + "args": { + "External id": 991822,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941679677.109, "dur": 31.983, + "args": { + "External id": 991823,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941679766.579, "dur": 133.485, + "args": { + "External id": 991824,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941679794.932, "dur": 101.258, + "args": { + "External id": 991825,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9808, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941679806.149, "dur": 85.379, + "args": { + "External id": 991826,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941679921.257, "dur": 2.046, + "args": { + "External id": 991827,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9810, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941680041.084, "dur": 2134.725, + "args": { + "External id": 991828,"Sequence number": 10552495, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9811 + } + }, + { + "ph": "f", "id": 424, "pid": 2338710, "tid": 2379450, "ts": 6345941680041.084, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941680219.317, "dur": 130.017, + "args": { + "External id": 991829,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941680399.254, "dur": 48.845, + "args": { + "External id": 991830,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345941680470.876, "dur": 59.214, + "args": { + "External id": 991831,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941680541.488, "dur": 36.279, + "args": { + "External id": 991832,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941680585.541, "dur": 36.731, + "args": { + "External id": 991833,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941680630.159, "dur": 32.440, + "args": { + "External id": 991834,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941680672.552, "dur": 32.637, + "args": { + "External id": 991835,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941680734.473, "dur": 27.213, + "args": { + "External id": 991836,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941680783.194, "dur": 34.645, + "args": { + "External id": 991837,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941680843.493, "dur": 24.062, + "args": { + "External id": 991838,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941680882.878, "dur": 18.014, + "args": { + "External id": 991839,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941680913.182, "dur": 45.284, + "args": { + "External id": 991840,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941680962.815, "dur": 39.291, + "args": { + "External id": 991841,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345941681095.658, "dur": 338.853, + "args": { + "External id": 991842,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941681210.550, "dur": 10.186, + "args": { + "External id": 991843,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941681223.762, "dur": 3.465, + "args": { + "External id": 991844,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941681228.946, "dur": 2.841, + "args": { + "External id": 991845,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941681233.030, "dur": 4.673, + "args": { + "External id": 991846,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941681295.928, "dur": 6.055, + "args": { + "External id": 991847,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941681298.181, "dur": 3.592, + "args": { + "External id": 991848,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941681303.973, "dur": 39.742, + "args": { + "External id": 991849,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941681310.626, "dur": 2.142, + "args": { + "External id": 991850,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941681345.565, "dur": 10.071, + "args": { + "External id": 991851,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941681354.711, "dur": 0.794, + "args": { + "External id": 991852,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941681356.793, "dur": 20.388, + "args": { + "External id": 991853,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941681362.469, "dur": 0.592, + "args": { + "External id": 991854,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941681480.516, "dur": 33.146, + "args": { + "External id": 991855,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941681532.915, "dur": 22.500, + "args": { + "External id": 991856,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941681565.422, "dur": 57.492, + "args": { + "External id": 991857,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941681631.122, "dur": 48.379, + "args": { + "External id": 991858,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941681689.773, "dur": 30.568, + "args": { + "External id": 991859,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941681729.989, "dur": 40.803, + "args": { + "External id": 991860,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941681779.542, "dur": 33.486, + "args": { + "External id": 991861,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941681824.425, "dur": 34.921, + "args": { + "External id": 991862,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345941681883.022, "dur": 29.410, + "args": { + "External id": 991863,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941681929.415, "dur": 29.393, + "args": { + "External id": 991864,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941681974.739, "dur": 21.199, + "args": { + "External id": 991865,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941682033.092, "dur": 21.123, + "args": { + "External id": 991866,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345941682112.554, "dur": 24.092, + "args": { + "External id": 991867,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941682229.192, "dur": 18.741, + "args": { + "External id": 991868,"Record function id": 0, "Ev Idx": 9851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941682233.528, "dur": 13.094, + "args": { + "External id": 991869,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941682238.746, "dur": 6.607, + "args": { + "External id": 991870,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941682240.442, "dur": 4.767, + "args": { + "External id": 991871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941682252.792, "dur": 7.103, + "args": { + "External id": 991872,"Record function id": 0, "Ev Idx": 9855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941682254.810, "dur": 4.525, + "args": { + "External id": 991873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941682255.827, "dur": 2.956, + "args": { + "External id": 991874,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941682257.002, "dur": 1.614, + "args": { + "External id": 991875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941682264.304, "dur": 5.837, + "args": { + "External id": 991876,"Record function id": 0, "Ev Idx": 9859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941682265.791, "dur": 3.762, + "args": { + "External id": 991877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941682266.422, "dur": 2.638, + "args": { + "External id": 991878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941682267.447, "dur": 1.528, + "args": { + "External id": 991879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941682273.895, "dur": 5.152, + "args": { + "External id": 991880,"Record function id": 0, "Ev Idx": 9863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941682275.225, "dur": 3.368, + "args": { + "External id": 991881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941682275.861, "dur": 2.250, + "args": { + "External id": 991882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941682276.666, "dur": 1.307, + "args": { + "External id": 991883,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941682282.858, "dur": 5.434, + "args": { + "External id": 991884,"Record function id": 0, "Ev Idx": 9867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941682284.210, "dur": 3.592, + "args": { + "External id": 991885,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941682284.876, "dur": 2.430, + "args": { + "External id": 991886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941682285.887, "dur": 1.319, + "args": { + "External id": 991887,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941682291.983, "dur": 8.825, + "args": { + "External id": 991888,"Record function id": 0, "Ev Idx": 9871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941682293.744, "dur": 6.593, + "args": { + "External id": 991889,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941682294.597, "dur": 5.156, + "args": { + "External id": 991890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941682295.570, "dur": 4.060, + "args": { + "External id": 991891,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941682304.893, "dur": 6.756, + "args": { + "External id": 991892,"Record function id": 0, "Ev Idx": 9875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941682306.484, "dur": 4.703, + "args": { + "External id": 991893,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941682307.408, "dur": 3.165, + "args": { + "External id": 991894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941682308.610, "dur": 1.874, + "args": { + "External id": 991895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941682315.432, "dur": 6.403, + "args": { + "External id": 991896,"Record function id": 0, "Ev Idx": 9879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941682317.015, "dur": 4.376, + "args": { + "External id": 991897,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941682318.008, "dur": 2.826, + "args": { + "External id": 991898,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941682319.269, "dur": 1.440, + "args": { + "External id": 991899,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941682325.910, "dur": 5.646, + "args": { + "External id": 991900,"Record function id": 0, "Ev Idx": 9883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941682327.380, "dur": 3.709, + "args": { + "External id": 991901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941682328.193, "dur": 2.370, + "args": { + "External id": 991902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941682329.081, "dur": 1.395, + "args": { + "External id": 991903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941682336.207, "dur": 60962.143, + "args": { + "External id": 991904,"Record function id": 0, "Sequence number": 10552494, "Fwd thread id": 1, "Ev Idx": 9887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941682337.600, "dur": 60949.602, + "args": { + "External id": 991905,"Sequence number": 10552494, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9888 + } + }, + { + "ph": "f", "id": 425, "pid": 2338710, "tid": 2379450, "ts": 6345941682337.600, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.11)", "pid": 2338710, "tid": 2379450, + "ts": 6345941682372.116, "dur": 46.285, + "args": { + "External id": 991906,"Record function id": 0, "Ev Idx": 9889 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.11)", "pid": 2338710, "tid": 2379450, + "ts": 6345941682428.061, "dur": 72.842, + "args": { + "External id": 991907,"Record function id": 0, "Ev Idx": 9890 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.11)", "pid": 2338710, "tid": 2379450, + "ts": 6345941682508.240, "dur": 60769.058, + "args": { + "External id": 991908,"Record function id": 0, "Ev Idx": 9891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941682616.247, "dur": 8.596, + "args": { + "External id": 991909,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941682636.293, "dur": 5.451, + "args": { + "External id": 991910,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941682657.452, "dur": 59348.090, + "args": { + "External id": 991911,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941682672.849, "dur": 59316.129, + "args": { + "External id": 991912,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941682801.675, "dur": 21.165, + "args": { + "External id": 991913,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941682845.845, "dur": 59083.291, + "args": { + "External id": 991914,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941682849.130, "dur": 59078.514, + "args": { + "External id": 991915,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941682854.648, "dur": 9.479, + "args": { + "External id": 991916,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941682868.756, "dur": 59051.967, + "args": { + "External id": 991917,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941742191.469, "dur": 16.775, + "args": { + "External id": 991918,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941742196.538, "dur": 10.962, + "args": { + "External id": 991919,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941742251.280, "dur": 488.772, + "args": { + "External id": 991920,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941742293.787, "dur": 440.275, + "args": { + "External id": 991921,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9904, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941742309.955, "dur": 417.036, + "args": { + "External id": 991922,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941742766.668, "dur": 2.694, + "args": { + "External id": 991923,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9906, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941742841.173, "dur": 8.852, + "args": { + "External id": 991924,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941742868.893, "dur": 38.420, + "args": { + "External id": 991925,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941742920.033, "dur": 4.676, + "args": { + "External id": 991926,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941742930.441, "dur": 14.992, + "args": { + "External id": 991927,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941742952.995, "dur": 1.054, + "args": { + "External id": 991928,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941742960.042, "dur": 14.327, + "args": { + "External id": 991929,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941742980.125, "dur": 0.973, + "args": { + "External id": 991930,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941742987.073, "dur": 13.793, + "args": { + "External id": 991931,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941743005.630, "dur": 1.010, + "args": { + "External id": 991932,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941743034.581, "dur": 56.821, + "args": { + "External id": 991933,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941743102.869, "dur": 3.065, + "args": { + "External id": 991934,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941743111.257, "dur": 15.863, + "args": { + "External id": 991935,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941743132.340, "dur": 1.242, + "args": { + "External id": 991936,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941743138.484, "dur": 14.052, + "args": { + "External id": 991937,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941743157.723, "dur": 1.134, + "args": { + "External id": 991938,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941743163.396, "dur": 13.441, + "args": { + "External id": 991939,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941743181.540, "dur": 1.376, + "args": { + "External id": 991940,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941743187.889, "dur": 13.793, + "args": { + "External id": 991941,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941743320.004, "dur": 3400.429, + "args": { + "External id": 991942,"Record function id": 0, "Ev Idx": 9925 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.10)", "pid": 2338710, "tid": 2379450, + "ts": 6345941743343.388, "dur": 1253.164, + "args": { + "External id": 991943,"Record function id": 0, "Ev Idx": 9926 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2338710, "tid": 2379450, + "ts": 6345941743361.466, "dur": 378.042, + "args": { + "External id": 991944,"Record function id": 0, "Ev Idx": 9927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941743457.421, "dur": 8.159, + "args": { + "External id": 991945,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941743470.617, "dur": 1.060, + "args": { + "External id": 991946,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941743474.139, "dur": 1.145, + "args": { + "External id": 991947,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941743477.447, "dur": 1.217, + "args": { + "External id": 991948,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941743481.036, "dur": 1.168, + "args": { + "External id": 991949,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941743486.135, "dur": 0.934, + "args": { + "External id": 991950,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941743488.638, "dur": 0.764, + "args": { + "External id": 991951,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941743491.191, "dur": 2.152, + "args": { + "External id": 991952,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941743494.762, "dur": 3.243, + "args": { + "External id": 991953,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941743501.738, "dur": 0.835, + "args": { + "External id": 991954,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941743524.172, "dur": 183.545, + "args": { + "External id": 991955,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941743546.918, "dur": 154.393, + "args": { + "External id": 991956,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941743565.965, "dur": 16.938, + "args": { + "External id": 991957,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941743588.444, "dur": 78.194, + "args": { + "External id": 991958,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941743591.668, "dur": 74.572, + "args": { + "External id": 991959,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941743596.124, "dur": 6.487, + "args": { + "External id": 991960,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941743604.864, "dur": 60.449, + "args": { + "External id": 991961,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9944 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.9", "pid": 2338710, "tid": 2379450, + "ts": 6345941743828.752, "dur": 758.572, + "args": { + "External id": 991962,"Record function id": 0, "Ev Idx": 9945 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2338710, "tid": 2379450, + "ts": 6345941743845.807, "dur": 726.618, + "args": { + "External id": 991963,"Record function id": 0, "Ev Idx": 9946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941743908.082, "dur": 6.979, + "args": { + "External id": 991964,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941743932.938, "dur": 33.369, + "args": { + "External id": 991965,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941743939.349, "dur": 1.492, + "args": { + "External id": 991966,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941743942.835, "dur": 0.704, + "args": { + "External id": 991967,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941743945.203, "dur": 0.419, + "args": { + "External id": 991968,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941743947.542, "dur": 2.828, + "args": { + "External id": 991969,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941743951.854, "dur": 0.638, + "args": { + "External id": 991970,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941743954.665, "dur": 0.517, + "args": { + "External id": 991971,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941743957.023, "dur": 0.468, + "args": { + "External id": 991972,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941743958.773, "dur": 0.572, + "args": { + "External id": 991973,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941743961.882, "dur": 0.434, + "args": { + "External id": 991974,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941743977.682, "dur": 67.016, + "args": { + "External id": 991975,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345941744127.840, "dur": 150.904, + "args": { + "External id": 991976,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941744143.518, "dur": 6.052, + "args": { + "External id": 991977,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345941744156.171, "dur": 13.910, + "args": { + "External id": 991978,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345941744161.199, "dur": 8.410, + "args": { + "External id": 991979,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941744166.268, "dur": 1.125, + "args": { + "External id": 991980,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941744185.802, "dur": 30.852, + "args": { + "External id": 991981,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941744188.820, "dur": 3.388, + "args": { + "External id": 991982,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941744194.586, "dur": 0.513, + "args": { + "External id": 991983,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941744197.092, "dur": 0.306, + "args": { + "External id": 991984,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941744198.552, "dur": 0.761, + "args": { + "External id": 991985,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941744201.858, "dur": 0.480, + "args": { + "External id": 991986,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941744203.682, "dur": 0.531, + "args": { + "External id": 991987,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941744205.757, "dur": 0.736, + "args": { + "External id": 991988,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941744208.399, "dur": 0.431, + "args": { + "External id": 991989,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941744210.807, "dur": 2.531, + "args": { + "External id": 991990,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941744229.953, "dur": 39.994, + "args": { + "External id": 991991,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941744336.933, "dur": 151.619, + "args": { + "External id": 991992,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941744373.500, "dur": 110.947, + "args": { + "External id": 991993,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9976, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941744384.980, "dur": 94.307, + "args": { + "External id": 991994,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941744509.054, "dur": 2.429, + "args": { + "External id": 991995,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9978, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941744604.742, "dur": 2089.684, + "args": { + "External id": 991996,"Sequence number": 10552493, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9979 + } + }, + { + "ph": "f", "id": 426, "pid": 2338710, "tid": 2379450, "ts": 6345941744604.742, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941744732.729, "dur": 126.836, + "args": { + "External id": 991997,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941744906.986, "dur": 47.913, + "args": { + "External id": 991998,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345941744976.266, "dur": 124.480, + "args": { + "External id": 991999,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941745117.712, "dur": 42.936, + "args": { + "External id": 992000,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941745167.986, "dur": 37.451, + "args": { + "External id": 992001,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941745213.123, "dur": 30.744, + "args": { + "External id": 992002,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941745253.845, "dur": 32.358, + "args": { + "External id": 992003,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941745318.844, "dur": 27.390, + "args": { + "External id": 992004,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941745369.754, "dur": 33.941, + "args": { + "External id": 992005,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941745428.157, "dur": 21.073, + "args": { + "External id": 992006,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941745465.969, "dur": 15.872, + "args": { + "External id": 992007,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941745493.456, "dur": 42.506, + "args": { + "External id": 992008,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941745540.708, "dur": 37.364, + "args": { + "External id": 992009,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345941745610.503, "dur": 368.278, + "args": { + "External id": 992010,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941745718.251, "dur": 10.638, + "args": { + "External id": 992011,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941745731.875, "dur": 3.365, + "args": { + "External id": 992012,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941745736.681, "dur": 2.112, + "args": { + "External id": 992013,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941745740.404, "dur": 4.277, + "args": { + "External id": 992014,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941745801.405, "dur": 6.377, + "args": { + "External id": 992015,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941745804.121, "dur": 3.444, + "args": { + "External id": 992016,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941745820.115, "dur": 36.768, + "args": { + "External id": 992017,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941745827.099, "dur": 2.099, + "args": { + "External id": 992018,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941745858.442, "dur": 2.309, + "args": { + "External id": 992019,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941745859.811, "dur": 0.799, + "args": { + "External id": 992020,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941745861.554, "dur": 55.088, + "args": { + "External id": 992021,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941745863.841, "dur": 32.930, + "args": { + "External id": 992022,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941746041.778, "dur": 80.702, + "args": { + "External id": 992023,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941746147.305, "dur": 19.627, + "args": { + "External id": 992024,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941746178.312, "dur": 62.480, + "args": { + "External id": 992025,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941746250.237, "dur": 46.991, + "args": { + "External id": 992026,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941746307.443, "dur": 24.608, + "args": { + "External id": 992027,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941746341.796, "dur": 36.422, + "args": { + "External id": 992028,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941746386.765, "dur": 31.771, + "args": { + "External id": 992029,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941746426.688, "dur": 33.810, + "args": { + "External id": 992030,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345941746482.288, "dur": 27.697, + "args": { + "External id": 992031,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941746527.778, "dur": 26.715, + "args": { + "External id": 992032,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941746570.107, "dur": 20.522, + "args": { + "External id": 992033,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941746607.383, "dur": 16.721, + "args": { + "External id": 992034,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345941746637.719, "dur": 19.335, + "args": { + "External id": 992035,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941746746.255, "dur": 18.326, + "args": { + "External id": 992036,"Record function id": 0, "Ev Idx": 10019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941746750.382, "dur": 12.837, + "args": { + "External id": 992037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941746755.475, "dur": 6.624, + "args": { + "External id": 992038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941746757.257, "dur": 4.685, + "args": { + "External id": 992039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941746769.410, "dur": 7.164, + "args": { + "External id": 992040,"Record function id": 0, "Ev Idx": 10023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941746771.142, "dur": 4.825, + "args": { + "External id": 992041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941746772.204, "dur": 3.100, + "args": { + "External id": 992042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941746773.616, "dur": 1.532, + "args": { + "External id": 992043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941746780.593, "dur": 6.246, + "args": { + "External id": 992044,"Record function id": 0, "Ev Idx": 10027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941746782.218, "dur": 4.073, + "args": { + "External id": 992045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941746782.832, "dur": 2.927, + "args": { + "External id": 992046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941746784.168, "dur": 1.497, + "args": { + "External id": 992047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941746790.659, "dur": 6.406, + "args": { + "External id": 992048,"Record function id": 0, "Ev Idx": 10031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941746792.445, "dur": 4.108, + "args": { + "External id": 992049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941746793.262, "dur": 2.792, + "args": { + "External id": 992050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941746794.527, "dur": 1.404, + "args": { + "External id": 992051,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941746800.917, "dur": 6.297, + "args": { + "External id": 992052,"Record function id": 0, "Ev Idx": 10035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941746802.501, "dur": 4.213, + "args": { + "External id": 992053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941746803.287, "dur": 2.897, + "args": { + "External id": 992054,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941746804.559, "dur": 1.504, + "args": { + "External id": 992055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941746810.906, "dur": 8.173, + "args": { + "External id": 992056,"Record function id": 0, "Ev Idx": 10039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941746812.279, "dur": 6.322, + "args": { + "External id": 992057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941746812.818, "dur": 5.231, + "args": { + "External id": 992058,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941746813.802, "dur": 4.132, + "args": { + "External id": 992059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941746822.986, "dur": 6.049, + "args": { + "External id": 992060,"Record function id": 0, "Ev Idx": 10043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941746824.976, "dur": 3.591, + "args": { + "External id": 992061,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941746825.750, "dur": 2.352, + "args": { + "External id": 992062,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941746826.696, "dur": 1.288, + "args": { + "External id": 992063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941746832.739, "dur": 5.866, + "args": { + "External id": 992064,"Record function id": 0, "Ev Idx": 10047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941746834.387, "dur": 3.728, + "args": { + "External id": 992065,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941746835.048, "dur": 2.582, + "args": { + "External id": 992066,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941746835.974, "dur": 1.533, + "args": { + "External id": 992067,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941746842.355, "dur": 6.193, + "args": { + "External id": 992068,"Record function id": 0, "Ev Idx": 10051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941746844.110, "dur": 3.965, + "args": { + "External id": 992069,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941746844.639, "dur": 2.950, + "args": { + "External id": 992070,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941746846.308, "dur": 1.163, + "args": { + "External id": 992071,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941746853.554, "dur": 66979.970, + "args": { + "External id": 992072,"Record function id": 0, "Sequence number": 10552492, "Fwd thread id": 1, "Ev Idx": 10055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941746854.919, "dur": 66968.039, + "args": { + "External id": 992073,"Sequence number": 10552492, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10056 + } + }, + { + "ph": "f", "id": 427, "pid": 2338710, "tid": 2379450, "ts": 6345941746854.919, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.10)", "pid": 2338710, "tid": 2379450, + "ts": 6345941746890.599, "dur": 46.286, + "args": { + "External id": 992074,"Record function id": 0, "Ev Idx": 10057 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.10)", "pid": 2338710, "tid": 2379450, + "ts": 6345941746946.628, "dur": 93.786, + "args": { + "External id": 992075,"Record function id": 0, "Ev Idx": 10058 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.10)", "pid": 2338710, "tid": 2379450, + "ts": 6345941747049.568, "dur": 66763.503, + "args": { + "External id": 992076,"Record function id": 0, "Ev Idx": 10059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941747201.480, "dur": 8.769, + "args": { + "External id": 992077,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941747224.589, "dur": 6.463, + "args": { + "External id": 992078,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941747248.722, "dur": 65410.215, + "args": { + "External id": 992079,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941747265.859, "dur": 65376.462, + "args": { + "External id": 992080,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941747395.797, "dur": 21.438, + "args": { + "External id": 992081,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941747442.150, "dur": 65137.145, + "args": { + "External id": 992082,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941747446.519, "dur": 65131.702, + "args": { + "External id": 992083,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941747451.883, "dur": 9.622, + "args": { + "External id": 992084,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941747466.256, "dur": 65104.792, + "args": { + "External id": 992085,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941812800.579, "dur": 16.696, + "args": { + "External id": 992086,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941812805.551, "dur": 11.250, + "args": { + "External id": 992087,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941812856.307, "dur": 478.962, + "args": { + "External id": 992088,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941812898.743, "dur": 428.471, + "args": { + "External id": 992089,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10072, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941812915.346, "dur": 402.091, + "args": { + "External id": 992090,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941813374.390, "dur": 2.909, + "args": { + "External id": 992091,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10074, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941813451.304, "dur": 11.390, + "args": { + "External id": 992092,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941813480.829, "dur": 45.563, + "args": { + "External id": 992093,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941813539.866, "dur": 3.083, + "args": { + "External id": 992094,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941813548.825, "dur": 16.104, + "args": { + "External id": 992095,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941813571.931, "dur": 1.150, + "args": { + "External id": 992096,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941813578.594, "dur": 14.511, + "args": { + "External id": 992097,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941813598.741, "dur": 1.230, + "args": { + "External id": 992098,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941813605.621, "dur": 14.117, + "args": { + "External id": 992099,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941813624.758, "dur": 1.034, + "args": { + "External id": 992100,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941813629.878, "dur": 13.999, + "args": { + "External id": 992101,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941813648.845, "dur": 1.104, + "args": { + "External id": 992102,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941813654.741, "dur": 12.864, + "args": { + "External id": 992103,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941813672.469, "dur": 0.872, + "args": { + "External id": 992104,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941813677.776, "dur": 14.531, + "args": { + "External id": 992105,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941813696.855, "dur": 0.789, + "args": { + "External id": 992106,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941813701.858, "dur": 13.407, + "args": { + "External id": 992107,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941813721.937, "dur": 3.306, + "args": { + "External id": 992108,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941813731.064, "dur": 13.856, + "args": { + "External id": 992109,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941813851.915, "dur": 3414.938, + "args": { + "External id": 992110,"Record function id": 0, "Ev Idx": 10093 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.9)", "pid": 2338710, "tid": 2379450, + "ts": 6345941813876.109, "dur": 1288.286, + "args": { + "External id": 992111,"Record function id": 0, "Ev Idx": 10094 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2338710, "tid": 2379450, + "ts": 6345941813895.970, "dur": 444.757, + "args": { + "External id": 992112,"Record function id": 0, "Ev Idx": 10095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941813987.672, "dur": 6.364, + "args": { + "External id": 992113,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941813997.378, "dur": 0.988, + "args": { + "External id": 992114,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941814000.544, "dur": 1.243, + "args": { + "External id": 992115,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941814003.926, "dur": 1.154, + "args": { + "External id": 992116,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941814006.760, "dur": 19.738, + "args": { + "External id": 992117,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941814032.466, "dur": 1.040, + "args": { + "External id": 992118,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941814035.282, "dur": 0.901, + "args": { + "External id": 992119,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941814039.836, "dur": 4.561, + "args": { + "External id": 992120,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941814046.399, "dur": 0.869, + "args": { + "External id": 992121,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941814049.153, "dur": 1.181, + "args": { + "External id": 992122,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941814116.461, "dur": 186.639, + "args": { + "External id": 992123,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941814138.340, "dur": 159.544, + "args": { + "External id": 992124,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941814155.853, "dur": 20.753, + "args": { + "External id": 992125,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941814184.384, "dur": 82.779, + "args": { + "External id": 992126,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941814188.019, "dur": 78.787, + "args": { + "External id": 992127,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941814192.963, "dur": 7.442, + "args": { + "External id": 992128,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941814202.447, "dur": 63.513, + "args": { + "External id": 992129,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10112 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.8", "pid": 2338710, "tid": 2379450, + "ts": 6345941814432.627, "dur": 722.152, + "args": { + "External id": 992130,"Record function id": 0, "Ev Idx": 10113 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2338710, "tid": 2379450, + "ts": 6345941814451.766, "dur": 688.452, + "args": { + "External id": 992131,"Record function id": 0, "Ev Idx": 10114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941814519.308, "dur": 6.558, + "args": { + "External id": 992132,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941814543.344, "dur": 35.271, + "args": { + "External id": 992133,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941814549.969, "dur": 1.784, + "args": { + "External id": 992134,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941814553.739, "dur": 0.683, + "args": { + "External id": 992135,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941814556.436, "dur": 2.885, + "args": { + "External id": 992136,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941814561.100, "dur": 0.555, + "args": { + "External id": 992137,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941814563.106, "dur": 0.484, + "args": { + "External id": 992138,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941814565.633, "dur": 0.704, + "args": { + "External id": 992139,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941814568.447, "dur": 0.851, + "args": { + "External id": 992140,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941814570.648, "dur": 0.482, + "args": { + "External id": 992141,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941814573.497, "dur": 0.555, + "args": { + "External id": 992142,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941814590.455, "dur": 49.193, + "args": { + "External id": 992143,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345941814674.743, "dur": 127.307, + "args": { + "External id": 992144,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941814686.497, "dur": 3.288, + "args": { + "External id": 992145,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345941814695.588, "dur": 14.691, + "args": { + "External id": 992146,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345941814700.468, "dur": 9.306, + "args": { + "External id": 992147,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941814704.871, "dur": 3.094, + "args": { + "External id": 992148,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941814717.736, "dur": 27.870, + "args": { + "External id": 992149,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941814720.555, "dur": 0.650, + "args": { + "External id": 992150,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941814722.858, "dur": 0.603, + "args": { + "External id": 992151,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941814725.977, "dur": 0.517, + "args": { + "External id": 992152,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941814728.101, "dur": 0.686, + "args": { + "External id": 992153,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941814730.446, "dur": 0.536, + "args": { + "External id": 992154,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941814732.859, "dur": 0.524, + "args": { + "External id": 992155,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941814735.148, "dur": 0.429, + "args": { + "External id": 992156,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941814736.795, "dur": 2.915, + "args": { + "External id": 992157,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941814742.206, "dur": 0.429, + "args": { + "External id": 992158,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941814758.230, "dur": 35.825, + "args": { + "External id": 992159,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941814852.621, "dur": 134.048, + "args": { + "External id": 992160,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941814878.626, "dur": 103.985, + "args": { + "External id": 992161,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10144, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941814889.590, "dur": 84.832, + "args": { + "External id": 992162,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941815006.145, "dur": 24.140, + "args": { + "External id": 992163,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10146, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941815173.276, "dur": 2069.066, + "args": { + "External id": 992164,"Sequence number": 10552491, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10147 + } + }, + { + "ph": "f", "id": 428, "pid": 2338710, "tid": 2379450, "ts": 6345941815173.276, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941815300.635, "dur": 132.464, + "args": { + "External id": 992165,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941815482.376, "dur": 50.966, + "args": { + "External id": 992166,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345941815554.957, "dur": 61.290, + "args": { + "External id": 992167,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941815627.702, "dur": 37.158, + "args": { + "External id": 992168,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941815672.073, "dur": 39.008, + "args": { + "External id": 992169,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941815721.572, "dur": 32.658, + "args": { + "External id": 992170,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941815763.849, "dur": 35.639, + "args": { + "External id": 992171,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941815829.816, "dur": 27.699, + "args": { + "External id": 992172,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941815879.367, "dur": 34.263, + "args": { + "External id": 992173,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941815937.927, "dur": 24.744, + "args": { + "External id": 992174,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941815977.900, "dur": 19.606, + "args": { + "External id": 992175,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941816032.626, "dur": 94.380, + "args": { + "External id": 992176,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941816135.156, "dur": 43.029, + "args": { + "External id": 992177,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345941816220.805, "dur": 317.350, + "args": { + "External id": 992178,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 10161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941816333.231, "dur": 9.383, + "args": { + "External id": 992179,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941816345.459, "dur": 6.956, + "args": { + "External id": 992180,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941816353.989, "dur": 5.187, + "args": { + "External id": 992181,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941816360.671, "dur": 2.352, + "args": { + "External id": 992182,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941816420.527, "dur": 5.864, + "args": { + "External id": 992183,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941816422.548, "dur": 3.512, + "args": { + "External id": 992184,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941816428.693, "dur": 37.248, + "args": { + "External id": 992185,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941816435.209, "dur": 2.156, + "args": { + "External id": 992186,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941816467.882, "dur": 2.314, + "args": { + "External id": 992187,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941816469.153, "dur": 0.904, + "args": { + "External id": 992188,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941816471.067, "dur": 17.167, + "args": { + "External id": 992189,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941816473.442, "dur": 0.669, + "args": { + "External id": 992190,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941816578.090, "dur": 33.911, + "args": { + "External id": 992191,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941816634.160, "dur": 19.433, + "args": { + "External id": 992192,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941816663.036, "dur": 53.480, + "args": { + "External id": 992193,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941816724.609, "dur": 45.010, + "args": { + "External id": 992194,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941816779.430, "dur": 25.595, + "args": { + "External id": 992195,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941816814.475, "dur": 35.528, + "args": { + "External id": 992196,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941816858.295, "dur": 33.297, + "args": { + "External id": 992197,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941816899.551, "dur": 34.008, + "args": { + "External id": 992198,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345941816954.503, "dur": 27.045, + "args": { + "External id": 992199,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941816999.117, "dur": 50.672, + "args": { + "External id": 992200,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941817109.180, "dur": 24.797, + "args": { + "External id": 992201,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941817152.784, "dur": 16.423, + "args": { + "External id": 992202,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345941817183.952, "dur": 19.228, + "args": { + "External id": 992203,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941817294.839, "dur": 18.442, + "args": { + "External id": 992204,"Record function id": 0, "Ev Idx": 10187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941817299.369, "dur": 12.714, + "args": { + "External id": 992205,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941817304.509, "dur": 6.704, + "args": { + "External id": 992206,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941817306.410, "dur": 4.677, + "args": { + "External id": 992207,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941817318.146, "dur": 7.051, + "args": { + "External id": 992208,"Record function id": 0, "Ev Idx": 10191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941817319.769, "dur": 4.793, + "args": { + "External id": 992209,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941817320.832, "dur": 3.151, + "args": { + "External id": 992210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941817322.027, "dur": 1.791, + "args": { + "External id": 992211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941817329.199, "dur": 5.909, + "args": { + "External id": 992212,"Record function id": 0, "Ev Idx": 10195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941817330.895, "dur": 3.720, + "args": { + "External id": 992213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941817331.517, "dur": 2.572, + "args": { + "External id": 992214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941817332.598, "dur": 1.405, + "args": { + "External id": 992215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941817338.827, "dur": 5.685, + "args": { + "External id": 992216,"Record function id": 0, "Ev Idx": 10199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941817340.349, "dur": 3.643, + "args": { + "External id": 992217,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941817340.973, "dur": 2.510, + "args": { + "External id": 992218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941817342.110, "dur": 1.286, + "args": { + "External id": 992219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941817348.110, "dur": 5.820, + "args": { + "External id": 992220,"Record function id": 0, "Ev Idx": 10203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941817349.735, "dur": 3.693, + "args": { + "External id": 992221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941817350.346, "dur": 2.544, + "args": { + "External id": 992222,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941817351.463, "dur": 1.297, + "args": { + "External id": 992223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941817357.542, "dur": 6.023, + "args": { + "External id": 992224,"Record function id": 0, "Ev Idx": 10207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941817359.499, "dur": 3.557, + "args": { + "External id": 992225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941817360.277, "dur": 2.280, + "args": { + "External id": 992226,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941817361.270, "dur": 1.198, + "args": { + "External id": 992227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941817367.516, "dur": 8.347, + "args": { + "External id": 992228,"Record function id": 0, "Ev Idx": 10211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941817369.196, "dur": 6.147, + "args": { + "External id": 992229,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941817369.750, "dur": 5.053, + "args": { + "External id": 992230,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941817370.867, "dur": 3.832, + "args": { + "External id": 992231,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941817379.504, "dur": 5.978, + "args": { + "External id": 992232,"Record function id": 0, "Ev Idx": 10215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941817381.122, "dur": 3.879, + "args": { + "External id": 992233,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941817381.718, "dur": 2.755, + "args": { + "External id": 992234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941817382.799, "dur": 1.588, + "args": { + "External id": 992235,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941817389.088, "dur": 5.795, + "args": { + "External id": 992236,"Record function id": 0, "Ev Idx": 10219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941817390.742, "dur": 3.640, + "args": { + "External id": 992237,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941817391.573, "dur": 2.321, + "args": { + "External id": 992238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941817392.394, "dur": 1.413, + "args": { + "External id": 992239,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941817399.444, "dur": 68730.666, + "args": { + "External id": 992240,"Record function id": 0, "Sequence number": 10552490, "Fwd thread id": 1, "Ev Idx": 10223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941817401.536, "dur": 68711.596, + "args": { + "External id": 992241,"Sequence number": 10552490, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10224 + } + }, + { + "ph": "f", "id": 429, "pid": 2338710, "tid": 2379450, "ts": 6345941817401.536, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.9)", "pid": 2338710, "tid": 2379450, + "ts": 6345941817436.678, "dur": 45.075, + "args": { + "External id": 992242,"Record function id": 0, "Ev Idx": 10225 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.9)", "pid": 2338710, "tid": 2379450, + "ts": 6345941817492.343, "dur": 71.075, + "args": { + "External id": 992243,"Record function id": 0, "Ev Idx": 10226 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.9)", "pid": 2338710, "tid": 2379450, + "ts": 6345941817570.857, "dur": 68528.473, + "args": { + "External id": 992244,"Record function id": 0, "Ev Idx": 10227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941817676.111, "dur": 7.111, + "args": { + "External id": 992245,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941817694.408, "dur": 5.522, + "args": { + "External id": 992246,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941817715.858, "dur": 67086.497, + "args": { + "External id": 992247,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941817732.014, "dur": 67053.599, + "args": { + "External id": 992248,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941817862.630, "dur": 22.295, + "args": { + "External id": 992249,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941817912.125, "dur": 66817.095, + "args": { + "External id": 992250,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941817916.429, "dur": 66811.705, + "args": { + "External id": 992251,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941817922.436, "dur": 19.720, + "args": { + "External id": 992252,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941817950.335, "dur": 66771.933, + "args": { + "External id": 992253,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941884944.932, "dur": 16.707, + "args": { + "External id": 992254,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941884950.196, "dur": 10.934, + "args": { + "External id": 992255,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941885005.409, "dur": 564.841, + "args": { + "External id": 992256,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941885123.144, "dur": 439.061, + "args": { + "External id": 992257,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10240, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941885141.984, "dur": 412.942, + "args": { + "External id": 992258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941885598.967, "dur": 3.791, + "args": { + "External id": 992259,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10242, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941885684.862, "dur": 9.426, + "args": { + "External id": 992260,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941885712.789, "dur": 40.063, + "args": { + "External id": 992261,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941885767.518, "dur": 4.498, + "args": { + "External id": 992262,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941885778.798, "dur": 15.683, + "args": { + "External id": 992263,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941885801.081, "dur": 0.960, + "args": { + "External id": 992264,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941885807.463, "dur": 13.941, + "args": { + "External id": 992265,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941885826.970, "dur": 1.151, + "args": { + "External id": 992266,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941885833.224, "dur": 12.240, + "args": { + "External id": 992267,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941885850.531, "dur": 0.931, + "args": { + "External id": 992268,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941885855.899, "dur": 13.462, + "args": { + "External id": 992269,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941885874.770, "dur": 1.215, + "args": { + "External id": 992270,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941885881.971, "dur": 13.300, + "args": { + "External id": 992271,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941885899.802, "dur": 1.308, + "args": { + "External id": 992272,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941885905.026, "dur": 12.704, + "args": { + "External id": 992273,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941885922.752, "dur": 0.940, + "args": { + "External id": 992274,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941885927.834, "dur": 12.104, + "args": { + "External id": 992275,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941885944.763, "dur": 0.981, + "args": { + "External id": 992276,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941885949.473, "dur": 13.038, + "args": { + "External id": 992277,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941886153.579, "dur": 3393.177, + "args": { + "External id": 992278,"Record function id": 0, "Ev Idx": 10261 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.8)", "pid": 2338710, "tid": 2379450, + "ts": 6345941886180.337, "dur": 1263.467, + "args": { + "External id": 992279,"Record function id": 0, "Ev Idx": 10262 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2338710, "tid": 2379450, + "ts": 6345941886201.668, "dur": 401.869, + "args": { + "External id": 992280,"Record function id": 0, "Ev Idx": 10263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941886304.526, "dur": 9.801, + "args": { + "External id": 992281,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941886319.527, "dur": 1.155, + "args": { + "External id": 992282,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941886322.870, "dur": 1.291, + "args": { + "External id": 992283,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941886326.858, "dur": 1.230, + "args": { + "External id": 992284,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941886329.745, "dur": 0.782, + "args": { + "External id": 992285,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941886332.009, "dur": 0.932, + "args": { + "External id": 992286,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941886335.097, "dur": 0.984, + "args": { + "External id": 992287,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941886339.900, "dur": 2.212, + "args": { + "External id": 992288,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941886343.508, "dur": 3.518, + "args": { + "External id": 992289,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941886348.973, "dur": 0.826, + "args": { + "External id": 992290,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941886370.754, "dur": 194.068, + "args": { + "External id": 992291,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941886391.450, "dur": 167.566, + "args": { + "External id": 992292,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941886409.237, "dur": 19.846, + "args": { + "External id": 992293,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941886434.307, "dur": 89.977, + "args": { + "External id": 992294,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941886440.900, "dur": 82.984, + "args": { + "External id": 992295,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941886447.213, "dur": 8.109, + "args": { + "External id": 992296,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941886457.737, "dur": 65.417, + "args": { + "External id": 992297,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10280 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.7", "pid": 2338710, "tid": 2379450, + "ts": 6345941886695.731, "dur": 739.164, + "args": { + "External id": 992298,"Record function id": 0, "Ev Idx": 10281 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2338710, "tid": 2379450, + "ts": 6345941886715.307, "dur": 704.118, + "args": { + "External id": 992299,"Record function id": 0, "Ev Idx": 10282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941886781.422, "dur": 7.335, + "args": { + "External id": 992300,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941886805.522, "dur": 34.450, + "args": { + "External id": 992301,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941886811.755, "dur": 2.149, + "args": { + "External id": 992302,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941886816.539, "dur": 0.748, + "args": { + "External id": 992303,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941886819.308, "dur": 1.027, + "args": { + "External id": 992304,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941886821.644, "dur": 3.401, + "args": { + "External id": 992305,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941886827.185, "dur": 0.630, + "args": { + "External id": 992306,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941886829.679, "dur": 0.491, + "args": { + "External id": 992307,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941886831.314, "dur": 0.651, + "args": { + "External id": 992308,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941886833.572, "dur": 0.429, + "args": { + "External id": 992309,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941886835.860, "dur": 0.484, + "args": { + "External id": 992310,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941886860.049, "dur": 45.725, + "args": { + "External id": 992311,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345941886941.515, "dur": 196.162, + "args": { + "External id": 992312,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941886952.930, "dur": 4.320, + "args": { + "External id": 992313,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345941886963.116, "dur": 11.300, + "args": { + "External id": 992314,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345941886967.762, "dur": 6.195, + "args": { + "External id": 992315,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941886972.059, "dur": 0.551, + "args": { + "External id": 992316,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941886981.777, "dur": 50.297, + "args": { + "External id": 992317,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941886983.910, "dur": 2.806, + "args": { + "External id": 992318,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941886988.659, "dur": 0.545, + "args": { + "External id": 992319,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941886990.856, "dur": 0.628, + "args": { + "External id": 992320,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941886993.709, "dur": 0.420, + "args": { + "External id": 992321,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941886996.042, "dur": 0.467, + "args": { + "External id": 992322,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941886997.755, "dur": 0.426, + "args": { + "External id": 992323,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941887000.204, "dur": 0.615, + "args": { + "External id": 992324,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941887002.402, "dur": 0.368, + "args": { + "External id": 992325,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941887004.023, "dur": 22.510, + "args": { + "External id": 992326,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941887048.010, "dur": 78.573, + "args": { + "External id": 992327,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941887194.916, "dur": 141.834, + "args": { + "External id": 992328,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941887228.619, "dur": 103.722, + "args": { + "External id": 992329,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10312, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941887239.663, "dur": 88.050, + "args": { + "External id": 992330,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941887357.048, "dur": 1.896, + "args": { + "External id": 992331,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10314, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941887452.482, "dur": 2066.308, + "args": { + "External id": 992332,"Sequence number": 10552489, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10315 + } + }, + { + "ph": "f", "id": 430, "pid": 2338710, "tid": 2379450, "ts": 6345941887452.482, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941887579.013, "dur": 127.476, + "args": { + "External id": 992333,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941887756.384, "dur": 46.861, + "args": { + "External id": 992334,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345941887824.516, "dur": 58.226, + "args": { + "External id": 992335,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941887894.665, "dur": 36.303, + "args": { + "External id": 992336,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941887938.955, "dur": 36.621, + "args": { + "External id": 992337,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941887983.619, "dur": 52.792, + "args": { + "External id": 992338,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941888050.415, "dur": 81.089, + "args": { + "External id": 992339,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941888171.196, "dur": 27.281, + "args": { + "External id": 992340,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941888222.177, "dur": 38.800, + "args": { + "External id": 992341,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941888286.591, "dur": 21.892, + "args": { + "External id": 992342,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941888324.987, "dur": 17.624, + "args": { + "External id": 992343,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941888355.540, "dur": 49.342, + "args": { + "External id": 992344,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941888409.652, "dur": 37.661, + "args": { + "External id": 992345,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345941888485.545, "dur": 312.669, + "args": { + "External id": 992346,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 10329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941888599.139, "dur": 9.635, + "args": { + "External id": 992347,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941888611.745, "dur": 3.549, + "args": { + "External id": 992348,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941888616.784, "dur": 5.987, + "args": { + "External id": 992349,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941888623.899, "dur": 5.260, + "args": { + "External id": 992350,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941888683.614, "dur": 6.466, + "args": { + "External id": 992351,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941888686.043, "dur": 3.785, + "args": { + "External id": 992352,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941888692.682, "dur": 36.322, + "args": { + "External id": 992353,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941888698.669, "dur": 2.100, + "args": { + "External id": 992354,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941888730.702, "dur": 2.104, + "args": { + "External id": 992355,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941888731.812, "dur": 0.887, + "args": { + "External id": 992356,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941888733.613, "dur": 17.286, + "args": { + "External id": 992357,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941888736.143, "dur": 0.610, + "args": { + "External id": 992358,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941888835.869, "dur": 30.799, + "args": { + "External id": 992359,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941888885.975, "dur": 18.745, + "args": { + "External id": 992360,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941888914.475, "dur": 48.989, + "args": { + "External id": 992361,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941888971.502, "dur": 67.813, + "args": { + "External id": 992362,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941889053.963, "dur": 73.379, + "args": { + "External id": 992363,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941889142.001, "dur": 37.473, + "args": { + "External id": 992364,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941889189.049, "dur": 33.240, + "args": { + "External id": 992365,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941889230.546, "dur": 35.738, + "args": { + "External id": 992366,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345941889290.116, "dur": 30.941, + "args": { + "External id": 992367,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941889340.216, "dur": 29.329, + "args": { + "External id": 992368,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941889385.367, "dur": 20.852, + "args": { + "External id": 992369,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941889421.896, "dur": 18.373, + "args": { + "External id": 992370,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345941889461.182, "dur": 18.608, + "args": { + "External id": 992371,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941889573.074, "dur": 18.066, + "args": { + "External id": 992372,"Record function id": 0, "Ev Idx": 10355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941889576.950, "dur": 13.245, + "args": { + "External id": 992373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941889582.030, "dur": 7.020, + "args": { + "External id": 992374,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941889583.991, "dur": 4.917, + "args": { + "External id": 992375,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941889595.938, "dur": 7.234, + "args": { + "External id": 992376,"Record function id": 0, "Ev Idx": 10359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941889597.676, "dur": 4.828, + "args": { + "External id": 992377,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941889598.630, "dur": 3.228, + "args": { + "External id": 992378,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941889600.139, "dur": 1.601, + "args": { + "External id": 992379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941889607.073, "dur": 5.903, + "args": { + "External id": 992380,"Record function id": 0, "Ev Idx": 10363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941889608.619, "dur": 3.877, + "args": { + "External id": 992381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941889609.401, "dur": 2.574, + "args": { + "External id": 992382,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941889610.453, "dur": 1.415, + "args": { + "External id": 992383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941889616.794, "dur": 5.777, + "args": { + "External id": 992384,"Record function id": 0, "Ev Idx": 10367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941889618.417, "dur": 3.687, + "args": { + "External id": 992385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941889619.027, "dur": 2.599, + "args": { + "External id": 992386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941889620.261, "dur": 1.273, + "args": { + "External id": 992387,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941889626.217, "dur": 5.873, + "args": { + "External id": 992388,"Record function id": 0, "Ev Idx": 10371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941889627.936, "dur": 3.690, + "args": { + "External id": 992389,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941889628.480, "dur": 2.607, + "args": { + "External id": 992390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941889629.570, "dur": 1.426, + "args": { + "External id": 992391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941889635.725, "dur": 8.416, + "args": { + "External id": 992392,"Record function id": 0, "Ev Idx": 10375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941889637.662, "dur": 6.035, + "args": { + "External id": 992393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941889638.267, "dur": 4.748, + "args": { + "External id": 992394,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941889639.356, "dur": 3.563, + "args": { + "External id": 992395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941889647.975, "dur": 4.975, + "args": { + "External id": 992396,"Record function id": 0, "Ev Idx": 10379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941889649.203, "dur": 3.317, + "args": { + "External id": 992397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941889649.947, "dur": 2.103, + "args": { + "External id": 992398,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941889650.782, "dur": 1.160, + "args": { + "External id": 992399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941889656.550, "dur": 4.776, + "args": { + "External id": 992400,"Record function id": 0, "Ev Idx": 10383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941889657.987, "dur": 2.817, + "args": { + "External id": 992401,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941889658.868, "dur": 1.456, + "args": { + "External id": 992402,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941889659.456, "dur": 0.762, + "args": { + "External id": 992403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941889665.324, "dur": 4.174, + "args": { + "External id": 992404,"Record function id": 0, "Ev Idx": 10387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941889666.545, "dur": 2.505, + "args": { + "External id": 992405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941889667.404, "dur": 1.183, + "args": { + "External id": 992406,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941889667.767, "dur": 0.722, + "args": { + "External id": 992407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941889674.526, "dur": 68159.162, + "args": { + "External id": 992408,"Record function id": 0, "Sequence number": 10552488, "Fwd thread id": 1, "Ev Idx": 10391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941889676.506, "dur": 68145.926, + "args": { + "External id": 992409,"Sequence number": 10552488, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10392 + } + }, + { + "ph": "f", "id": 431, "pid": 2338710, "tid": 2379450, "ts": 6345941889676.506, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.8)", "pid": 2338710, "tid": 2379450, + "ts": 6345941889710.451, "dur": 44.633, + "args": { + "External id": 992410,"Record function id": 0, "Ev Idx": 10393 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.8)", "pid": 2338710, "tid": 2379450, + "ts": 6345941889764.339, "dur": 68.094, + "args": { + "External id": 992411,"Record function id": 0, "Ev Idx": 10394 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.8)", "pid": 2338710, "tid": 2379450, + "ts": 6345941889838.451, "dur": 67973.612, + "args": { + "External id": 992412,"Record function id": 0, "Ev Idx": 10395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941889944.158, "dur": 8.858, + "args": { + "External id": 992413,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941889964.748, "dur": 5.619, + "args": { + "External id": 992414,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941889986.736, "dur": 66663.959, + "args": { + "External id": 992415,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941890003.076, "dur": 66631.108, + "args": { + "External id": 992416,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941890200.632, "dur": 33.808, + "args": { + "External id": 992417,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941890264.034, "dur": 66309.478, + "args": { + "External id": 992418,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941890267.727, "dur": 66304.594, + "args": { + "External id": 992419,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941890274.262, "dur": 17.887, + "args": { + "External id": 992420,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941890294.585, "dur": 66270.811, + "args": { + "External id": 992421,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941956799.801, "dur": 17.319, + "args": { + "External id": 992422,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941956805.433, "dur": 11.273, + "args": { + "External id": 992423,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941956861.641, "dur": 474.989, + "args": { + "External id": 992424,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941956906.078, "dur": 422.678, + "args": { + "External id": 992425,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10408, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941956921.678, "dur": 398.839, + "args": { + "External id": 992426,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941957365.664, "dur": 2.612, + "args": { + "External id": 992427,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10410, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941957447.230, "dur": 9.202, + "args": { + "External id": 992428,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941957472.838, "dur": 44.037, + "args": { + "External id": 992429,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941957529.845, "dur": 3.143, + "args": { + "External id": 992430,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941957539.607, "dur": 15.897, + "args": { + "External id": 992431,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941957562.191, "dur": 3.611, + "args": { + "External id": 992432,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941957571.758, "dur": 15.009, + "args": { + "External id": 992433,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941957592.281, "dur": 1.088, + "args": { + "External id": 992434,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941957599.598, "dur": 14.288, + "args": { + "External id": 992435,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941957618.828, "dur": 1.093, + "args": { + "External id": 992436,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941957624.338, "dur": 14.260, + "args": { + "External id": 992437,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941957643.161, "dur": 1.064, + "args": { + "External id": 992438,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941957648.431, "dur": 13.848, + "args": { + "External id": 992439,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941957667.298, "dur": 1.040, + "args": { + "External id": 992440,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941957673.100, "dur": 14.299, + "args": { + "External id": 992441,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941957693.980, "dur": 1.137, + "args": { + "External id": 992442,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941957700.807, "dur": 13.973, + "args": { + "External id": 992443,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941957719.313, "dur": 1.097, + "args": { + "External id": 992444,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345941957725.278, "dur": 14.714, + "args": { + "External id": 992445,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941957853.011, "dur": 3501.921, + "args": { + "External id": 992446,"Record function id": 0, "Ev Idx": 10429 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.7)", "pid": 2338710, "tid": 2379450, + "ts": 6345941957878.487, "dur": 1318.074, + "args": { + "External id": 992447,"Record function id": 0, "Ev Idx": 10430 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2338710, "tid": 2379450, + "ts": 6345941957900.278, "dur": 472.140, + "args": { + "External id": 992448,"Record function id": 0, "Ev Idx": 10431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941958005.119, "dur": 27.173, + "args": { + "External id": 992449,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941958038.867, "dur": 3.101, + "args": { + "External id": 992450,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941958044.514, "dur": 1.051, + "args": { + "External id": 992451,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941958047.538, "dur": 0.834, + "args": { + "External id": 992452,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941958050.127, "dur": 1.059, + "args": { + "External id": 992453,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941958094.605, "dur": 2.813, + "args": { + "External id": 992454,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941958100.527, "dur": 1.066, + "args": { + "External id": 992455,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941958103.059, "dur": 2.704, + "args": { + "External id": 992456,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941958107.585, "dur": 0.728, + "args": { + "External id": 992457,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941958111.663, "dur": 2.828, + "args": { + "External id": 992458,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941958137.169, "dur": 196.298, + "args": { + "External id": 992459,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941958158.769, "dur": 169.171, + "args": { + "External id": 992460,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941958179.454, "dur": 17.755, + "args": { + "External id": 992461,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941958203.080, "dur": 90.716, + "args": { + "External id": 992462,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941958206.006, "dur": 87.405, + "args": { + "External id": 992463,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941958210.676, "dur": 11.211, + "args": { + "External id": 992464,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941958224.303, "dur": 67.837, + "args": { + "External id": 992465,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10448 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.6", "pid": 2338710, "tid": 2379450, + "ts": 6345941958469.383, "dur": 718.853, + "args": { + "External id": 992466,"Record function id": 0, "Ev Idx": 10449 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2338710, "tid": 2379450, + "ts": 6345941958489.067, "dur": 683.232, + "args": { + "External id": 992467,"Record function id": 0, "Ev Idx": 10450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941958559.267, "dur": 5.776, + "args": { + "External id": 992468,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941958581.977, "dur": 31.853, + "args": { + "External id": 992469,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941958588.084, "dur": 1.673, + "args": { + "External id": 992470,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941958591.667, "dur": 0.729, + "args": { + "External id": 992471,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941958594.476, "dur": 0.728, + "args": { + "External id": 992472,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941958596.801, "dur": 0.593, + "args": { + "External id": 992473,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941958598.712, "dur": 2.834, + "args": { + "External id": 992474,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941958603.743, "dur": 0.472, + "args": { + "External id": 992475,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941958606.021, "dur": 0.347, + "args": { + "External id": 992476,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941958607.532, "dur": 0.508, + "args": { + "External id": 992477,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941958609.513, "dur": 0.435, + "args": { + "External id": 992478,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941958624.495, "dur": 48.120, + "args": { + "External id": 992479,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345941958707.422, "dur": 122.065, + "args": { + "External id": 992480,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941958719.519, "dur": 2.976, + "args": { + "External id": 992481,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345941958728.370, "dur": 12.026, + "args": { + "External id": 992482,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345941958733.437, "dur": 6.488, + "args": { + "External id": 992483,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941958737.760, "dur": 0.778, + "args": { + "External id": 992484,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345941958747.915, "dur": 25.613, + "args": { + "External id": 992485,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941958750.199, "dur": 0.382, + "args": { + "External id": 992486,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941958752.161, "dur": 2.900, + "args": { + "External id": 992487,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941958756.918, "dur": 0.453, + "args": { + "External id": 992488,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941958759.386, "dur": 0.413, + "args": { + "External id": 992489,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941958761.055, "dur": 0.900, + "args": { + "External id": 992490,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941958764.090, "dur": 0.496, + "args": { + "External id": 992491,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941958766.245, "dur": 0.298, + "args": { + "External id": 992492,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941958767.988, "dur": 0.535, + "args": { + "External id": 992493,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941958770.228, "dur": 0.440, + "args": { + "External id": 992494,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941958784.817, "dur": 34.271, + "args": { + "External id": 992495,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345941958881.279, "dur": 155.871, + "args": { + "External id": 992496,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941958908.435, "dur": 122.512, + "args": { + "External id": 992497,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10480, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345941958919.546, "dur": 85.442, + "args": { + "External id": 992498,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345941959100.742, "dur": 3.974, + "args": { + "External id": 992499,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10482, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941959205.765, "dur": 2117.744, + "args": { + "External id": 992500,"Sequence number": 10552487, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10483 + } + }, + { + "ph": "f", "id": 432, "pid": 2338710, "tid": 2379450, "ts": 6345941959205.765, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941959335.646, "dur": 130.917, + "args": { + "External id": 992501,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941959516.051, "dur": 52.016, + "args": { + "External id": 992502,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345941959589.437, "dur": 63.894, + "args": { + "External id": 992503,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941959664.747, "dur": 38.486, + "args": { + "External id": 992504,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941959710.345, "dur": 37.205, + "args": { + "External id": 992505,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941959754.922, "dur": 32.018, + "args": { + "External id": 992506,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941959796.472, "dur": 32.640, + "args": { + "External id": 992507,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941959861.759, "dur": 29.051, + "args": { + "External id": 992508,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345941959915.485, "dur": 34.598, + "args": { + "External id": 992509,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941959976.064, "dur": 25.279, + "args": { + "External id": 992510,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941960040.817, "dur": 61.367, + "args": { + "External id": 992511,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941960123.001, "dur": 54.463, + "args": { + "External id": 992512,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941960182.209, "dur": 55.390, + "args": { + "External id": 992513,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345941960280.476, "dur": 301.552, + "args": { + "External id": 992514,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 10497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941960382.866, "dur": 7.306, + "args": { + "External id": 992515,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941960392.346, "dur": 2.859, + "args": { + "External id": 992516,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941960396.800, "dur": 6.888, + "args": { + "External id": 992517,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941960405.106, "dur": 4.706, + "args": { + "External id": 992518,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941960462.580, "dur": 6.428, + "args": { + "External id": 992519,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941960465.087, "dur": 3.660, + "args": { + "External id": 992520,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941960471.421, "dur": 37.699, + "args": { + "External id": 992521,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941960477.640, "dur": 1.859, + "args": { + "External id": 992522,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345941960510.921, "dur": 1.913, + "args": { + "External id": 992523,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941960512.124, "dur": 0.629, + "args": { + "External id": 992524,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345941960513.811, "dur": 19.262, + "args": { + "External id": 992525,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941960516.453, "dur": 0.788, + "args": { + "External id": 992526,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345941960621.849, "dur": 46.301, + "args": { + "External id": 992527,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941960686.859, "dur": 20.815, + "args": { + "External id": 992528,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941960717.419, "dur": 54.795, + "args": { + "External id": 992529,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941960780.758, "dur": 47.033, + "args": { + "External id": 992530,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941960837.834, "dur": 26.705, + "args": { + "External id": 992531,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941960874.554, "dur": 37.329, + "args": { + "External id": 992532,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941960921.088, "dur": 34.149, + "args": { + "External id": 992533,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345941960963.628, "dur": 35.393, + "args": { + "External id": 992534,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345941961043.251, "dur": 75.003, + "args": { + "External id": 992535,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941961140.433, "dur": 31.129, + "args": { + "External id": 992536,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345941961189.275, "dur": 22.099, + "args": { + "External id": 992537,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345941961229.234, "dur": 17.975, + "args": { + "External id": 992538,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345941961261.919, "dur": 22.964, + "args": { + "External id": 992539,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941961382.720, "dur": 18.662, + "args": { + "External id": 992540,"Record function id": 0, "Ev Idx": 10523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941961386.949, "dur": 13.053, + "args": { + "External id": 992541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941961392.216, "dur": 6.624, + "args": { + "External id": 992542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941961394.034, "dur": 4.670, + "args": { + "External id": 992543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941961406.147, "dur": 7.273, + "args": { + "External id": 992544,"Record function id": 0, "Ev Idx": 10527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941961407.954, "dur": 4.782, + "args": { + "External id": 992545,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941961408.600, "dur": 3.464, + "args": { + "External id": 992546,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941961409.973, "dur": 1.902, + "args": { + "External id": 992547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941961417.411, "dur": 5.373, + "args": { + "External id": 992548,"Record function id": 0, "Ev Idx": 10531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941961418.913, "dur": 3.379, + "args": { + "External id": 992549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941961419.519, "dur": 2.265, + "args": { + "External id": 992550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941961420.366, "dur": 1.328, + "args": { + "External id": 992551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941961426.664, "dur": 5.409, + "args": { + "External id": 992552,"Record function id": 0, "Ev Idx": 10535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941961428.147, "dur": 3.450, + "args": { + "External id": 992553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941961428.769, "dur": 2.307, + "args": { + "External id": 992554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941961429.707, "dur": 1.244, + "args": { + "External id": 992555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941961435.680, "dur": 5.903, + "args": { + "External id": 992556,"Record function id": 0, "Ev Idx": 10539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941961437.490, "dur": 3.617, + "args": { + "External id": 992557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941961438.032, "dur": 2.412, + "args": { + "External id": 992558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941961438.943, "dur": 1.413, + "args": { + "External id": 992559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941961445.197, "dur": 8.331, + "args": { + "External id": 992560,"Record function id": 0, "Ev Idx": 10543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941961447.080, "dur": 5.964, + "args": { + "External id": 992561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941961447.762, "dur": 4.784, + "args": { + "External id": 992562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941961448.613, "dur": 3.823, + "args": { + "External id": 992563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941961457.333, "dur": 6.907, + "args": { + "External id": 992564,"Record function id": 0, "Ev Idx": 10547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941961458.986, "dur": 4.771, + "args": { + "External id": 992565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941961459.944, "dur": 3.321, + "args": { + "External id": 992566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941961461.602, "dur": 1.577, + "args": { + "External id": 992567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941961468.060, "dur": 6.122, + "args": { + "External id": 992568,"Record function id": 0, "Ev Idx": 10551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941961469.790, "dur": 3.940, + "args": { + "External id": 992569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941961470.404, "dur": 2.603, + "args": { + "External id": 992570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941961471.455, "dur": 1.459, + "args": { + "External id": 992571,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941961478.516, "dur": 6.273, + "args": { + "External id": 992572,"Record function id": 0, "Ev Idx": 10555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345941961480.315, "dur": 4.033, + "args": { + "External id": 992573,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941961480.819, "dur": 3.023, + "args": { + "External id": 992574,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345941961481.941, "dur": 1.783, + "args": { + "External id": 992575,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941961489.431, "dur": 69455.460, + "args": { + "External id": 992576,"Record function id": 0, "Sequence number": 10552486, "Fwd thread id": 1, "Ev Idx": 10559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345941961490.963, "dur": 69442.928, + "args": { + "External id": 992577,"Sequence number": 10552486, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10560 + } + }, + { + "ph": "f", "id": 433, "pid": 2338710, "tid": 2379450, "ts": 6345941961490.963, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.7)", "pid": 2338710, "tid": 2379450, + "ts": 6345941961526.266, "dur": 46.923, + "args": { + "External id": 992578,"Record function id": 0, "Ev Idx": 10561 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.7)", "pid": 2338710, "tid": 2379450, + "ts": 6345941961582.558, "dur": 71.774, + "args": { + "External id": 992579,"Record function id": 0, "Ev Idx": 10562 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.7)", "pid": 2338710, "tid": 2379450, + "ts": 6345941961662.202, "dur": 69261.766, + "args": { + "External id": 992580,"Record function id": 0, "Ev Idx": 10563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941961775.549, "dur": 8.177, + "args": { + "External id": 992581,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345941961795.247, "dur": 5.667, + "args": { + "External id": 992582,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941961816.677, "dur": 68013.565, + "args": { + "External id": 992583,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345941961833.312, "dur": 67980.220, + "args": { + "External id": 992584,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345941961982.400, "dur": 23.122, + "args": { + "External id": 992585,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345941962096.518, "dur": 67659.513, + "args": { + "External id": 992586,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345941962101.769, "dur": 67653.258, + "args": { + "External id": 992587,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345941962107.973, "dur": 14.918, + "args": { + "External id": 992588,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345941962126.108, "dur": 67623.590, + "args": { + "External id": 992589,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942029975.608, "dur": 15.444, + "args": { + "External id": 992590,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942029980.665, "dur": 9.875, + "args": { + "External id": 992591,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345942030049.846, "dur": 404.340, + "args": { + "External id": 992592,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942030127.386, "dur": 321.368, + "args": { + "External id": 992593,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10576, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345942030144.919, "dur": 296.686, + "args": { + "External id": 992594,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942030479.876, "dur": 2.429, + "args": { + "External id": 992595,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10578, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942030557.366, "dur": 8.944, + "args": { + "External id": 992596,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942030583.761, "dur": 44.988, + "args": { + "External id": 992597,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942030642.470, "dur": 5.218, + "args": { + "External id": 992598,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942030653.842, "dur": 16.136, + "args": { + "External id": 992599,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942030676.659, "dur": 1.278, + "args": { + "External id": 992600,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942030683.721, "dur": 15.576, + "args": { + "External id": 992601,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942030705.038, "dur": 0.930, + "args": { + "External id": 992602,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942030711.057, "dur": 14.129, + "args": { + "External id": 992603,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942030730.908, "dur": 0.902, + "args": { + "External id": 992604,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942030737.156, "dur": 12.855, + "args": { + "External id": 992605,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942030754.901, "dur": 1.021, + "args": { + "External id": 992606,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942030760.235, "dur": 12.176, + "args": { + "External id": 992607,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942030777.790, "dur": 1.049, + "args": { + "External id": 992608,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942030784.141, "dur": 14.707, + "args": { + "External id": 992609,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942030803.801, "dur": 1.039, + "args": { + "External id": 992610,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942030809.168, "dur": 13.871, + "args": { + "External id": 992611,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942030830.065, "dur": 0.882, + "args": { + "External id": 992612,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942030836.032, "dur": 14.417, + "args": { + "External id": 992613,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942030963.809, "dur": 3482.285, + "args": { + "External id": 992614,"Record function id": 0, "Ev Idx": 10597 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.6)", "pid": 2338710, "tid": 2379450, + "ts": 6345942030987.509, "dur": 1352.285, + "args": { + "External id": 992615,"Record function id": 0, "Ev Idx": 10598 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2338710, "tid": 2379450, + "ts": 6345942031028.279, "dur": 447.681, + "args": { + "External id": 992616,"Record function id": 0, "Ev Idx": 10599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942031177.865, "dur": 9.621, + "args": { + "External id": 992617,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942031193.144, "dur": 0.892, + "args": { + "External id": 992618,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942031196.274, "dur": 0.831, + "args": { + "External id": 992619,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942031199.436, "dur": 1.083, + "args": { + "External id": 992620,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942031201.916, "dur": 0.947, + "args": { + "External id": 992621,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942031204.354, "dur": 0.882, + "args": { + "External id": 992622,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942031207.219, "dur": 0.939, + "args": { + "External id": 992623,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942031210.026, "dur": 2.077, + "args": { + "External id": 992624,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942031216.212, "dur": 3.084, + "args": { + "External id": 992625,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942031224.529, "dur": 0.723, + "args": { + "External id": 992626,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942031248.035, "dur": 188.422, + "args": { + "External id": 992627,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942031269.749, "dur": 161.221, + "args": { + "External id": 992628,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942031290.991, "dur": 18.389, + "args": { + "External id": 992629,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345942031314.948, "dur": 84.921, + "args": { + "External id": 992630,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942031318.473, "dur": 80.963, + "args": { + "External id": 992631,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942031324.710, "dur": 7.949, + "args": { + "External id": 992632,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345942031335.069, "dur": 63.384, + "args": { + "External id": 992633,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10616 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.5", "pid": 2338710, "tid": 2379450, + "ts": 6345942031570.037, "dur": 759.976, + "args": { + "External id": 992634,"Record function id": 0, "Ev Idx": 10617 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2338710, "tid": 2379450, + "ts": 6345942031589.574, "dur": 724.072, + "args": { + "External id": 992635,"Record function id": 0, "Ev Idx": 10618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942031661.530, "dur": 6.878, + "args": { + "External id": 992636,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345942031685.406, "dur": 31.926, + "args": { + "External id": 992637,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942031690.882, "dur": 2.066, + "args": { + "External id": 992638,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942031695.732, "dur": 0.550, + "args": { + "External id": 992639,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942031698.618, "dur": 0.319, + "args": { + "External id": 992640,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942031700.252, "dur": 3.112, + "args": { + "External id": 992641,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942031704.828, "dur": 0.530, + "args": { + "External id": 992642,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942031707.199, "dur": 0.289, + "args": { + "External id": 992643,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942031708.684, "dur": 0.507, + "args": { + "External id": 992644,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942031711.089, "dur": 0.470, + "args": { + "External id": 992645,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942031713.002, "dur": 0.717, + "args": { + "External id": 992646,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345942031729.177, "dur": 52.869, + "args": { + "External id": 992647,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345942031818.023, "dur": 128.624, + "args": { + "External id": 992648,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942031830.162, "dur": 3.808, + "args": { + "External id": 992649,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345942031839.797, "dur": 12.283, + "args": { + "External id": 992650,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345942031844.546, "dur": 7.044, + "args": { + "External id": 992651,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942031849.477, "dur": 0.570, + "args": { + "External id": 992652,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345942031859.020, "dur": 29.282, + "args": { + "External id": 992653,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942031861.305, "dur": 2.774, + "args": { + "External id": 992654,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942031866.133, "dur": 0.425, + "args": { + "External id": 992655,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942031867.915, "dur": 0.657, + "args": { + "External id": 992656,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942031870.327, "dur": 0.452, + "args": { + "External id": 992657,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942031872.380, "dur": 0.416, + "args": { + "External id": 992658,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942031874.281, "dur": 0.464, + "args": { + "External id": 992659,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942031876.373, "dur": 0.709, + "args": { + "External id": 992660,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942031878.512, "dur": 0.825, + "args": { + "External id": 992661,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942031880.733, "dur": 2.471, + "args": { + "External id": 992662,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345942031900.936, "dur": 37.711, + "args": { + "External id": 992663,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345942032000.561, "dur": 219.602, + "args": { + "External id": 992664,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942032103.650, "dur": 111.652, + "args": { + "External id": 992665,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10648, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345942032117.188, "dur": 93.260, + "args": { + "External id": 992666,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942032241.462, "dur": 1.846, + "args": { + "External id": 992667,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10650, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942032349.253, "dur": 2071.432, + "args": { + "External id": 992668,"Sequence number": 10552485, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10651 + } + }, + { + "ph": "f", "id": 434, "pid": 2338710, "tid": 2379450, "ts": 6345942032349.253, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942032478.886, "dur": 128.025, + "args": { + "External id": 992669,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345942032655.861, "dur": 49.244, + "args": { + "External id": 992670,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345942032727.111, "dur": 58.790, + "args": { + "External id": 992671,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942032798.338, "dur": 36.235, + "args": { + "External id": 992672,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942032842.292, "dur": 39.904, + "args": { + "External id": 992673,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942032888.751, "dur": 34.641, + "args": { + "External id": 992674,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942032933.682, "dur": 32.475, + "args": { + "External id": 992675,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345942032995.510, "dur": 51.278, + "args": { + "External id": 992676,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345942033122.432, "dur": 39.084, + "args": { + "External id": 992677,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345942033190.463, "dur": 22.564, + "args": { + "External id": 992678,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345942033230.274, "dur": 17.601, + "args": { + "External id": 992679,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942033261.683, "dur": 49.972, + "args": { + "External id": 992680,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942033316.567, "dur": 37.683, + "args": { + "External id": 992681,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345942033388.189, "dur": 309.576, + "args": { + "External id": 992682,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 10665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942033498.671, "dur": 9.903, + "args": { + "External id": 992683,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942033511.544, "dur": 3.379, + "args": { + "External id": 992684,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942033520.663, "dur": 2.405, + "args": { + "External id": 992685,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942033524.555, "dur": 4.714, + "args": { + "External id": 992686,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345942033583.503, "dur": 5.830, + "args": { + "External id": 992687,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942033585.486, "dur": 3.631, + "args": { + "External id": 992688,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345942033591.653, "dur": 35.830, + "args": { + "External id": 992689,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942033597.731, "dur": 1.952, + "args": { + "External id": 992690,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345942033629.177, "dur": 2.285, + "args": { + "External id": 992691,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942033630.664, "dur": 0.693, + "args": { + "External id": 992692,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345942033632.848, "dur": 16.315, + "args": { + "External id": 992693,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942033635.265, "dur": 0.734, + "args": { + "External id": 992694,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345942033736.278, "dur": 30.737, + "args": { + "External id": 992695,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345942033785.437, "dur": 18.787, + "args": { + "External id": 992696,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942033813.217, "dur": 48.256, + "args": { + "External id": 992697,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942033869.997, "dur": 45.418, + "args": { + "External id": 992698,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942033924.782, "dur": 25.087, + "args": { + "External id": 992699,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942033959.454, "dur": 35.141, + "args": { + "External id": 992700,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942034003.203, "dur": 100.113, + "args": { + "External id": 992701,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942034122.256, "dur": 53.574, + "args": { + "External id": 992702,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345942034205.669, "dur": 28.486, + "args": { + "External id": 992703,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345942034252.813, "dur": 27.775, + "args": { + "External id": 992704,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345942034297.210, "dur": 19.684, + "args": { + "External id": 992705,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345942034333.804, "dur": 15.828, + "args": { + "External id": 992706,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345942034364.070, "dur": 18.843, + "args": { + "External id": 992707,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942034472.159, "dur": 18.013, + "args": { + "External id": 992708,"Record function id": 0, "Ev Idx": 10691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942034476.346, "dur": 12.605, + "args": { + "External id": 992709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942034481.446, "dur": 6.365, + "args": { + "External id": 992710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942034483.399, "dur": 4.263, + "args": { + "External id": 992711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942034494.981, "dur": 7.065, + "args": { + "External id": 992712,"Record function id": 0, "Ev Idx": 10695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942034496.952, "dur": 4.512, + "args": { + "External id": 992713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942034497.720, "dur": 3.255, + "args": { + "External id": 992714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942034499.235, "dur": 1.608, + "args": { + "External id": 992715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942034506.029, "dur": 6.088, + "args": { + "External id": 992716,"Record function id": 0, "Ev Idx": 10699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942034507.859, "dur": 3.681, + "args": { + "External id": 992717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942034508.468, "dur": 2.642, + "args": { + "External id": 992718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942034509.786, "dur": 1.237, + "args": { + "External id": 992719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942034515.962, "dur": 5.531, + "args": { + "External id": 992720,"Record function id": 0, "Ev Idx": 10703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942034517.783, "dur": 3.232, + "args": { + "External id": 992721,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942034518.372, "dur": 2.213, + "args": { + "External id": 992722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942034519.354, "dur": 1.141, + "args": { + "External id": 992723,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942034525.111, "dur": 5.813, + "args": { + "External id": 992724,"Record function id": 0, "Ev Idx": 10707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942034526.674, "dur": 3.689, + "args": { + "External id": 992725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942034527.439, "dur": 2.364, + "args": { + "External id": 992726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942034528.297, "dur": 1.421, + "args": { + "External id": 992727,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942034534.565, "dur": 5.513, + "args": { + "External id": 992728,"Record function id": 0, "Ev Idx": 10711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942034536.003, "dur": 3.573, + "args": { + "External id": 992729,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942034536.737, "dur": 2.400, + "args": { + "External id": 992730,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942034537.502, "dur": 1.511, + "args": { + "External id": 992731,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942034543.845, "dur": 8.161, + "args": { + "External id": 992732,"Record function id": 0, "Ev Idx": 10715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942034545.539, "dur": 5.892, + "args": { + "External id": 992733,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942034546.246, "dur": 4.731, + "args": { + "External id": 992734,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942034547.412, "dur": 3.474, + "args": { + "External id": 992735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942034555.973, "dur": 5.256, + "args": { + "External id": 992736,"Record function id": 0, "Ev Idx": 10719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942034557.235, "dur": 3.456, + "args": { + "External id": 992737,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942034557.877, "dur": 2.361, + "args": { + "External id": 992738,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942034558.728, "dur": 1.377, + "args": { + "External id": 992739,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942034565.359, "dur": 4.879, + "args": { + "External id": 992740,"Record function id": 0, "Ev Idx": 10723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942034566.682, "dur": 3.044, + "args": { + "External id": 992741,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942034567.283, "dur": 1.922, + "args": { + "External id": 992742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942034568.013, "dur": 1.091, + "args": { + "External id": 992743,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942034574.931, "dur": 73336.677, + "args": { + "External id": 992744,"Record function id": 0, "Sequence number": 10552484, "Fwd thread id": 1, "Ev Idx": 10727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942034576.594, "dur": 73324.573, + "args": { + "External id": 992745,"Sequence number": 10552484, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10728 + } + }, + { + "ph": "f", "id": 435, "pid": 2338710, "tid": 2379450, "ts": 6345942034576.594, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.6)", "pid": 2338710, "tid": 2379450, + "ts": 6345942034611.074, "dur": 45.073, + "args": { + "External id": 992746,"Record function id": 0, "Ev Idx": 10729 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.6)", "pid": 2338710, "tid": 2379450, + "ts": 6345942034664.949, "dur": 79.044, + "args": { + "External id": 992747,"Record function id": 0, "Ev Idx": 10730 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.6)", "pid": 2338710, "tid": 2379450, + "ts": 6345942034751.589, "dur": 73139.068, + "args": { + "External id": 992748,"Record function id": 0, "Ev Idx": 10731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942034857.154, "dur": 8.261, + "args": { + "External id": 992749,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942034876.463, "dur": 5.511, + "args": { + "External id": 992750,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345942034897.706, "dur": 71698.804, + "args": { + "External id": 992751,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345942034913.698, "dur": 71666.442, + "args": { + "External id": 992752,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942035150.433, "dur": 30.031, + "args": { + "External id": 992753,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345942035205.044, "dur": 71318.939, + "args": { + "External id": 992754,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942035208.608, "dur": 71314.260, + "args": { + "External id": 992755,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942035214.324, "dur": 13.801, + "args": { + "External id": 992756,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345942035234.269, "dur": 71281.544, + "args": { + "External id": 992757,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942106737.449, "dur": 16.583, + "args": { + "External id": 992758,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942106742.380, "dur": 11.160, + "args": { + "External id": 992759,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345942106798.236, "dur": 571.670, + "args": { + "External id": 992760,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942106838.978, "dur": 522.840, + "args": { + "External id": 992761,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10744, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345942106853.851, "dur": 499.443, + "args": { + "External id": 992762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942107401.833, "dur": 2.564, + "args": { + "External id": 992763,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10746, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942107481.247, "dur": 9.556, + "args": { + "External id": 992764,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942107509.093, "dur": 42.365, + "args": { + "External id": 992765,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942107565.543, "dur": 4.765, + "args": { + "External id": 992766,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942107576.673, "dur": 16.447, + "args": { + "External id": 992767,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942107599.880, "dur": 0.975, + "args": { + "External id": 992768,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942107643.300, "dur": 17.430, + "args": { + "External id": 992769,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942107671.809, "dur": 0.943, + "args": { + "External id": 992770,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942107678.659, "dur": 14.613, + "args": { + "External id": 992771,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942107698.592, "dur": 0.991, + "args": { + "External id": 992772,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942107705.458, "dur": 15.172, + "args": { + "External id": 992773,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942107725.350, "dur": 1.215, + "args": { + "External id": 992774,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942107730.817, "dur": 14.121, + "args": { + "External id": 992775,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942107751.619, "dur": 0.870, + "args": { + "External id": 992776,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942107757.913, "dur": 15.137, + "args": { + "External id": 992777,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942107777.777, "dur": 0.819, + "args": { + "External id": 992778,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942107782.732, "dur": 13.889, + "args": { + "External id": 992779,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942107801.678, "dur": 0.778, + "args": { + "External id": 992780,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942107805.831, "dur": 13.872, + "args": { + "External id": 992781,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942107931.498, "dur": 3469.304, + "args": { + "External id": 992782,"Record function id": 0, "Ev Idx": 10765 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.5)", "pid": 2338710, "tid": 2379450, + "ts": 6345942107954.400, "dur": 1295.537, + "args": { + "External id": 992783,"Record function id": 0, "Ev Idx": 10766 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2338710, "tid": 2379450, + "ts": 6345942107972.055, "dur": 446.611, + "args": { + "External id": 992784,"Record function id": 0, "Ev Idx": 10767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942108131.951, "dur": 9.515, + "args": { + "External id": 992785,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942108145.480, "dur": 0.940, + "args": { + "External id": 992786,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942108148.987, "dur": 0.892, + "args": { + "External id": 992787,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942108154.192, "dur": 0.965, + "args": { + "External id": 992788,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942108156.775, "dur": 1.258, + "args": { + "External id": 992789,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942108159.672, "dur": 0.968, + "args": { + "External id": 992790,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942108162.606, "dur": 0.865, + "args": { + "External id": 992791,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942108167.521, "dur": 2.128, + "args": { + "External id": 992792,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942108171.275, "dur": 3.179, + "args": { + "External id": 992793,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942108176.018, "dur": 0.697, + "args": { + "External id": 992794,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942108199.047, "dur": 181.694, + "args": { + "External id": 992795,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942108219.873, "dur": 155.387, + "args": { + "External id": 992796,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942108238.219, "dur": 20.333, + "args": { + "External id": 992797,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345942108263.922, "dur": 82.486, + "args": { + "External id": 992798,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942108271.032, "dur": 74.913, + "args": { + "External id": 992799,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942108275.471, "dur": 6.971, + "args": { + "External id": 992800,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345942108284.351, "dur": 60.594, + "args": { + "External id": 992801,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10784 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.4", "pid": 2338710, "tid": 2379450, + "ts": 6345942108520.343, "dur": 721.014, + "args": { + "External id": 992802,"Record function id": 0, "Ev Idx": 10785 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2338710, "tid": 2379450, + "ts": 6345942108541.550, "dur": 684.757, + "args": { + "External id": 992803,"Record function id": 0, "Ev Idx": 10786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942108610.340, "dur": 7.120, + "args": { + "External id": 992804,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345942108634.939, "dur": 33.172, + "args": { + "External id": 992805,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942108641.443, "dur": 1.859, + "args": { + "External id": 992806,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942108645.287, "dur": 0.597, + "args": { + "External id": 992807,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942108648.152, "dur": 0.638, + "args": { + "External id": 992808,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942108650.855, "dur": 3.197, + "args": { + "External id": 992809,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942108655.459, "dur": 0.686, + "args": { + "External id": 992810,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942108658.072, "dur": 0.572, + "args": { + "External id": 992811,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942108660.332, "dur": 0.360, + "args": { + "External id": 992812,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942108661.907, "dur": 0.448, + "args": { + "External id": 992813,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942108664.207, "dur": 0.397, + "args": { + "External id": 992814,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345942108679.247, "dur": 46.595, + "args": { + "External id": 992815,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345942108761.658, "dur": 121.249, + "args": { + "External id": 992816,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942108773.630, "dur": 3.422, + "args": { + "External id": 992817,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345942108782.878, "dur": 11.930, + "args": { + "External id": 992818,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345942108788.192, "dur": 6.110, + "args": { + "External id": 992819,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942108791.980, "dur": 0.846, + "args": { + "External id": 992820,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345942108802.201, "dur": 29.275, + "args": { + "External id": 992821,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942108803.967, "dur": 2.817, + "args": { + "External id": 992822,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942108809.060, "dur": 0.575, + "args": { + "External id": 992823,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942108811.678, "dur": 0.371, + "args": { + "External id": 992824,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942108813.263, "dur": 0.533, + "args": { + "External id": 992825,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942108815.514, "dur": 0.529, + "args": { + "External id": 992826,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942108817.966, "dur": 0.305, + "args": { + "External id": 992827,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942108819.720, "dur": 0.483, + "args": { + "External id": 992828,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942108822.425, "dur": 0.498, + "args": { + "External id": 992829,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942108824.488, "dur": 2.983, + "args": { + "External id": 992830,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345942108842.769, "dur": 32.108, + "args": { + "External id": 992831,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345942108933.809, "dur": 203.904, + "args": { + "External id": 992832,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942108961.482, "dur": 171.480, + "args": { + "External id": 992833,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10816, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345942108973.140, "dur": 154.539, + "args": { + "External id": 992834,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942109158.124, "dur": 2.663, + "args": { + "External id": 992835,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10818, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942109258.730, "dur": 2114.691, + "args": { + "External id": 992836,"Sequence number": 10552483, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10819 + } + }, + { + "ph": "f", "id": 436, "pid": 2338710, "tid": 2379450, "ts": 6345942109258.730, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942109390.247, "dur": 131.423, + "args": { + "External id": 992837,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345942109571.382, "dur": 46.086, + "args": { + "External id": 992838,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345942109639.163, "dur": 59.017, + "args": { + "External id": 992839,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942109710.304, "dur": 36.967, + "args": { + "External id": 992840,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942109754.608, "dur": 37.688, + "args": { + "External id": 992841,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942109800.306, "dur": 32.118, + "args": { + "External id": 992842,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942109846.123, "dur": 33.220, + "args": { + "External id": 992843,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345942109912.489, "dur": 27.641, + "args": { + "External id": 992844,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345942109965.109, "dur": 35.475, + "args": { + "External id": 992845,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345942110105.126, "dur": 33.230, + "args": { + "External id": 992846,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345942110157.200, "dur": 20.564, + "args": { + "External id": 992847,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942110192.080, "dur": 54.572, + "args": { + "External id": 992848,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942110251.511, "dur": 37.765, + "args": { + "External id": 992849,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345942110340.928, "dur": 296.792, + "args": { + "External id": 992850,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 10833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942110439.624, "dur": 7.491, + "args": { + "External id": 992851,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942110450.149, "dur": 2.822, + "args": { + "External id": 992852,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942110454.664, "dur": 2.820, + "args": { + "External id": 992853,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942110458.802, "dur": 5.227, + "args": { + "External id": 992854,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345942110522.819, "dur": 6.019, + "args": { + "External id": 992855,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942110524.931, "dur": 3.653, + "args": { + "External id": 992856,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345942110531.001, "dur": 36.697, + "args": { + "External id": 992857,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942110537.478, "dur": 2.174, + "args": { + "External id": 992858,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345942110569.938, "dur": 1.967, + "args": { + "External id": 992859,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942110571.098, "dur": 0.669, + "args": { + "External id": 992860,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345942110573.313, "dur": 16.615, + "args": { + "External id": 992861,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942110575.575, "dur": 0.731, + "args": { + "External id": 992862,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345942110676.408, "dur": 31.749, + "args": { + "External id": 992863,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345942110737.750, "dur": 18.981, + "args": { + "External id": 992864,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942110766.514, "dur": 47.881, + "args": { + "External id": 992865,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942110823.286, "dur": 44.760, + "args": { + "External id": 992866,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942110878.352, "dur": 25.184, + "args": { + "External id": 992867,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942110913.193, "dur": 39.743, + "args": { + "External id": 992868,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942110961.328, "dur": 31.977, + "args": { + "External id": 992869,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942111004.888, "dur": 104.154, + "args": { + "External id": 992870,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345942111141.055, "dur": 31.185, + "args": { + "External id": 992871,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345942111191.159, "dur": 30.614, + "args": { + "External id": 992872,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345942111242.020, "dur": 20.651, + "args": { + "External id": 992873,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345942111280.303, "dur": 20.347, + "args": { + "External id": 992874,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345942111313.716, "dur": 19.560, + "args": { + "External id": 992875,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942111427.225, "dur": 18.543, + "args": { + "External id": 992876,"Record function id": 0, "Ev Idx": 10859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942111431.885, "dur": 12.869, + "args": { + "External id": 992877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942111436.868, "dur": 6.760, + "args": { + "External id": 992878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942111438.734, "dur": 4.754, + "args": { + "External id": 992879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942111450.744, "dur": 5.981, + "args": { + "External id": 992880,"Record function id": 0, "Ev Idx": 10863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942111452.564, "dur": 3.557, + "args": { + "External id": 992881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942111453.476, "dur": 2.105, + "args": { + "External id": 992882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942111454.288, "dur": 1.153, + "args": { + "External id": 992883,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942111460.749, "dur": 5.142, + "args": { + "External id": 992884,"Record function id": 0, "Ev Idx": 10867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942111462.025, "dur": 3.402, + "args": { + "External id": 992885,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942111462.728, "dur": 2.218, + "args": { + "External id": 992886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942111463.574, "dur": 1.286, + "args": { + "External id": 992887,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942111469.628, "dur": 4.513, + "args": { + "External id": 992888,"Record function id": 0, "Ev Idx": 10871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942111471.097, "dur": 2.536, + "args": { + "External id": 992889,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942111471.750, "dur": 1.407, + "args": { + "External id": 992890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942111472.167, "dur": 0.905, + "args": { + "External id": 992891,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942111477.742, "dur": 4.399, + "args": { + "External id": 992892,"Record function id": 0, "Ev Idx": 10875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942111479.170, "dur": 2.496, + "args": { + "External id": 992893,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942111479.797, "dur": 1.339, + "args": { + "External id": 992894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942111480.167, "dur": 0.883, + "args": { + "External id": 992895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942111485.847, "dur": 7.067, + "args": { + "External id": 992896,"Record function id": 0, "Ev Idx": 10879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942111487.404, "dur": 5.010, + "args": { + "External id": 992897,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942111488.211, "dur": 3.691, + "args": { + "External id": 992898,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942111488.529, "dur": 3.295, + "args": { + "External id": 992899,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942111496.790, "dur": 4.383, + "args": { + "External id": 992900,"Record function id": 0, "Ev Idx": 10883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942111498.241, "dur": 2.467, + "args": { + "External id": 992901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942111498.873, "dur": 1.365, + "args": { + "External id": 992902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942111499.340, "dur": 0.784, + "args": { + "External id": 992903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942111505.435, "dur": 4.566, + "args": { + "External id": 992904,"Record function id": 0, "Ev Idx": 10887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942111506.781, "dur": 2.705, + "args": { + "External id": 992905,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942111507.490, "dur": 1.522, + "args": { + "External id": 992906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942111508.028, "dur": 0.854, + "args": { + "External id": 992907,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942111514.170, "dur": 4.353, + "args": { + "External id": 992908,"Record function id": 0, "Ev Idx": 10891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942111515.368, "dur": 2.681, + "args": { + "External id": 992909,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942111516.015, "dur": 1.512, + "args": { + "External id": 992910,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942111516.567, "dur": 0.857, + "args": { + "External id": 992911,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942111523.330, "dur": 62454.147, + "args": { + "External id": 992912,"Record function id": 0, "Sequence number": 10552482, "Fwd thread id": 1, "Ev Idx": 10895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942111524.958, "dur": 62441.341, + "args": { + "External id": 992913,"Sequence number": 10552482, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10896 + } + }, + { + "ph": "f", "id": 437, "pid": 2338710, "tid": 2379450, "ts": 6345942111524.958, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.5)", "pid": 2338710, "tid": 2379450, + "ts": 6345942111559.325, "dur": 49.164, + "args": { + "External id": 992914,"Record function id": 0, "Ev Idx": 10897 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.5)", "pid": 2338710, "tid": 2379450, + "ts": 6345942111617.686, "dur": 77.202, + "args": { + "External id": 992915,"Record function id": 0, "Ev Idx": 10898 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.5)", "pid": 2338710, "tid": 2379450, + "ts": 6345942111701.578, "dur": 62255.300, + "args": { + "External id": 992916,"Record function id": 0, "Ev Idx": 10899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942111806.096, "dur": 7.724, + "args": { + "External id": 992917,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942111824.795, "dur": 5.343, + "args": { + "External id": 992918,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345942111845.935, "dur": 60941.187, + "args": { + "External id": 992919,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345942111861.931, "dur": 60908.665, + "args": { + "External id": 992920,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942111985.332, "dur": 45.491, + "args": { + "External id": 992921,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345942112098.310, "dur": 60619.643, + "args": { + "External id": 992922,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942112102.505, "dur": 60613.502, + "args": { + "External id": 992923,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942112109.216, "dur": 23.060, + "args": { + "External id": 992924,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345942112139.140, "dur": 60573.268, + "args": { + "External id": 992925,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942172936.899, "dur": 16.630, + "args": { + "External id": 992926,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942172942.089, "dur": 10.827, + "args": { + "External id": 992927,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345942172996.201, "dur": 495.857, + "args": { + "External id": 992928,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942173087.442, "dur": 396.802, + "args": { + "External id": 992929,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10912, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345942173106.501, "dur": 369.115, + "args": { + "External id": 992930,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942173520.264, "dur": 2.449, + "args": { + "External id": 992931,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10914, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942173598.804, "dur": 9.159, + "args": { + "External id": 992932,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942173626.266, "dur": 44.601, + "args": { + "External id": 992933,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942173684.111, "dur": 4.494, + "args": { + "External id": 992934,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942173695.133, "dur": 16.185, + "args": { + "External id": 992935,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942173718.039, "dur": 0.984, + "args": { + "External id": 992936,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942173725.369, "dur": 14.295, + "args": { + "External id": 992937,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942173745.533, "dur": 1.046, + "args": { + "External id": 992938,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942173752.655, "dur": 13.655, + "args": { + "External id": 992939,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942173770.826, "dur": 0.888, + "args": { + "External id": 992940,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942173776.985, "dur": 13.055, + "args": { + "External id": 992941,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942173794.845, "dur": 1.151, + "args": { + "External id": 992942,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942173800.629, "dur": 13.017, + "args": { + "External id": 992943,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942173818.434, "dur": 0.891, + "args": { + "External id": 992944,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942173826.033, "dur": 13.835, + "args": { + "External id": 992945,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942173844.313, "dur": 0.982, + "args": { + "External id": 992946,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942173849.551, "dur": 12.818, + "args": { + "External id": 992947,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942173866.851, "dur": 0.784, + "args": { + "External id": 992948,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942173872.065, "dur": 13.152, + "args": { + "External id": 992949,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942173996.127, "dur": 3533.903, + "args": { + "External id": 992950,"Record function id": 0, "Ev Idx": 10933 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.4)", "pid": 2338710, "tid": 2379450, + "ts": 6345942174042.941, "dur": 1355.333, + "args": { + "External id": 992951,"Record function id": 0, "Ev Idx": 10934 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2338710, "tid": 2379450, + "ts": 6345942174106.309, "dur": 418.584, + "args": { + "External id": 992952,"Record function id": 0, "Ev Idx": 10935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942174218.957, "dur": 9.704, + "args": { + "External id": 992953,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942174233.909, "dur": 1.154, + "args": { + "External id": 992954,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942174237.169, "dur": 1.030, + "args": { + "External id": 992955,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942174240.313, "dur": 0.963, + "args": { + "External id": 992956,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942174243.153, "dur": 1.240, + "args": { + "External id": 992957,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942174248.450, "dur": 0.886, + "args": { + "External id": 992958,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942174251.044, "dur": 1.195, + "args": { + "External id": 992959,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942174253.774, "dur": 1.572, + "args": { + "External id": 992960,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942174257.163, "dur": 2.903, + "args": { + "External id": 992961,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942174263.913, "dur": 0.683, + "args": { + "External id": 992962,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942174285.512, "dur": 199.119, + "args": { + "External id": 992963,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942174308.019, "dur": 169.813, + "args": { + "External id": 992964,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942174326.270, "dur": 21.338, + "args": { + "External id": 992965,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345942174353.693, "dur": 86.992, + "args": { + "External id": 992966,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942174357.095, "dur": 83.157, + "args": { + "External id": 992967,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942174363.246, "dur": 7.391, + "args": { + "External id": 992968,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345942174372.950, "dur": 66.437, + "args": { + "External id": 992969,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10952 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.3", "pid": 2338710, "tid": 2379450, + "ts": 6345942174629.222, "dur": 760.453, + "args": { + "External id": 992970,"Record function id": 0, "Ev Idx": 10953 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2338710, "tid": 2379450, + "ts": 6345942174647.470, "dur": 727.394, + "args": { + "External id": 992971,"Record function id": 0, "Ev Idx": 10954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942174717.176, "dur": 6.979, + "args": { + "External id": 992972,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345942174742.239, "dur": 35.592, + "args": { + "External id": 992973,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942174748.586, "dur": 2.135, + "args": { + "External id": 992974,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942174753.083, "dur": 0.603, + "args": { + "External id": 992975,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942174759.167, "dur": 0.501, + "args": { + "External id": 992976,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942174761.740, "dur": 2.386, + "args": { + "External id": 992977,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942174765.558, "dur": 0.405, + "args": { + "External id": 992978,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942174768.362, "dur": 0.423, + "args": { + "External id": 992979,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942174770.555, "dur": 0.300, + "args": { + "External id": 992980,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942174772.019, "dur": 0.447, + "args": { + "External id": 992981,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942174774.277, "dur": 0.469, + "args": { + "External id": 992982,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345942174789.940, "dur": 56.579, + "args": { + "External id": 992983,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345942174882.341, "dur": 155.726, + "args": { + "External id": 992984,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942174893.932, "dur": 4.422, + "args": { + "External id": 992985,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345942174904.049, "dur": 11.331, + "args": { + "External id": 992986,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345942174909.087, "dur": 5.854, + "args": { + "External id": 992987,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942174912.804, "dur": 0.747, + "args": { + "External id": 992988,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345942174923.165, "dur": 35.440, + "args": { + "External id": 992989,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942174925.241, "dur": 3.170, + "args": { + "External id": 992990,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942174930.694, "dur": 0.449, + "args": { + "External id": 992991,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942174932.828, "dur": 0.867, + "args": { + "External id": 992992,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942174935.180, "dur": 0.614, + "args": { + "External id": 992993,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942174937.766, "dur": 0.500, + "args": { + "External id": 992994,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942174940.195, "dur": 0.433, + "args": { + "External id": 992995,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942174946.439, "dur": 0.459, + "args": { + "External id": 992996,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942174948.684, "dur": 0.490, + "args": { + "External id": 992997,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942174950.192, "dur": 2.655, + "args": { + "External id": 992998,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345942174969.835, "dur": 36.823, + "args": { + "External id": 992999,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345942175140.453, "dur": 148.736, + "args": { + "External id": 993000,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942175174.212, "dur": 110.749, + "args": { + "External id": 993001,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10984, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345942175186.232, "dur": 93.467, + "args": { + "External id": 993002,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942175309.596, "dur": 2.029, + "args": { + "External id": 993003,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10986, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942175407.357, "dur": 2098.202, + "args": { + "External id": 993004,"Sequence number": 10552481, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10987 + } + }, + { + "ph": "f", "id": 438, "pid": 2338710, "tid": 2379450, "ts": 6345942175407.357, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942175539.197, "dur": 127.786, + "args": { + "External id": 993005,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345942175715.405, "dur": 48.341, + "args": { + "External id": 993006,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345942175785.284, "dur": 58.058, + "args": { + "External id": 993007,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942175859.203, "dur": 36.868, + "args": { + "External id": 993008,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942175903.536, "dur": 37.569, + "args": { + "External id": 993009,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942175948.606, "dur": 31.770, + "args": { + "External id": 993010,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942175990.389, "dur": 56.561, + "args": { + "External id": 993011,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345942176127.989, "dur": 32.850, + "args": { + "External id": 993012,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345942176184.081, "dur": 35.248, + "args": { + "External id": 993013,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345942176245.465, "dur": 24.268, + "args": { + "External id": 993014,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345942176284.393, "dur": 18.537, + "args": { + "External id": 993015,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942176315.195, "dur": 69.230, + "args": { + "External id": 993016,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942176395.961, "dur": 49.820, + "args": { + "External id": 993017,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345942176486.064, "dur": 301.890, + "args": { + "External id": 993018,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 11001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942176584.197, "dur": 11.003, + "args": { + "External id": 993019,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942176597.860, "dur": 3.697, + "args": { + "External id": 993020,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942176603.247, "dur": 2.304, + "args": { + "External id": 993021,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942176606.762, "dur": 4.109, + "args": { + "External id": 993022,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345942176663.574, "dur": 6.087, + "args": { + "External id": 993023,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942176665.961, "dur": 3.441, + "args": { + "External id": 993024,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345942176671.710, "dur": 41.877, + "args": { + "External id": 993025,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942176678.101, "dur": 1.939, + "args": { + "External id": 993026,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345942176715.467, "dur": 2.002, + "args": { + "External id": 993027,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942176716.794, "dur": 0.599, + "args": { + "External id": 993028,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345942176718.714, "dur": 18.811, + "args": { + "External id": 993029,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942176721.477, "dur": 0.612, + "args": { + "External id": 993030,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345942176826.454, "dur": 34.127, + "args": { + "External id": 993031,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345942176878.358, "dur": 19.942, + "args": { + "External id": 993032,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942176907.524, "dur": 45.290, + "args": { + "External id": 993033,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942176960.428, "dur": 66.823, + "args": { + "External id": 993034,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942177043.844, "dur": 74.162, + "args": { + "External id": 993035,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942177131.080, "dur": 41.288, + "args": { + "External id": 993036,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942177181.536, "dur": 32.992, + "args": { + "External id": 993037,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942177222.519, "dur": 35.596, + "args": { + "External id": 993038,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345942177284.273, "dur": 31.666, + "args": { + "External id": 993039,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 11022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345942177337.693, "dur": 28.520, + "args": { + "External id": 993040,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345942177381.409, "dur": 20.276, + "args": { + "External id": 993041,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345942177416.348, "dur": 17.977, + "args": { + "External id": 993042,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345942177448.724, "dur": 18.375, + "args": { + "External id": 993043,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 11026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942177556.846, "dur": 19.270, + "args": { + "External id": 993044,"Record function id": 0, "Ev Idx": 11027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942177561.565, "dur": 13.335, + "args": { + "External id": 993045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942177566.710, "dur": 6.959, + "args": { + "External id": 993046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942177568.937, "dur": 4.593, + "args": { + "External id": 993047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942177580.840, "dur": 6.355, + "args": { + "External id": 993048,"Record function id": 0, "Ev Idx": 11031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942177582.947, "dur": 3.662, + "args": { + "External id": 993049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942177583.673, "dur": 2.367, + "args": { + "External id": 993050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942177584.569, "dur": 1.357, + "args": { + "External id": 993051,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942177590.990, "dur": 5.546, + "args": { + "External id": 993052,"Record function id": 0, "Ev Idx": 11035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942177592.390, "dur": 3.650, + "args": { + "External id": 993053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942177593.108, "dur": 2.422, + "args": { + "External id": 993054,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942177593.914, "dur": 1.492, + "args": { + "External id": 993055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942177600.232, "dur": 5.491, + "args": { + "External id": 993056,"Record function id": 0, "Ev Idx": 11039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942177602.085, "dur": 3.140, + "args": { + "External id": 993057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942177602.733, "dur": 1.964, + "args": { + "External id": 993058,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942177603.679, "dur": 0.901, + "args": { + "External id": 993059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942177609.443, "dur": 4.919, + "args": { + "External id": 993060,"Record function id": 0, "Ev Idx": 11043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942177610.838, "dur": 3.006, + "args": { + "External id": 993061,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942177611.616, "dur": 1.664, + "args": { + "External id": 993062,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942177612.260, "dur": 0.918, + "args": { + "External id": 993063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942177618.023, "dur": 5.045, + "args": { + "External id": 993064,"Record function id": 0, "Ev Idx": 11047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942177619.548, "dur": 2.967, + "args": { + "External id": 993065,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942177620.144, "dur": 1.826, + "args": { + "External id": 993066,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942177620.897, "dur": 0.913, + "args": { + "External id": 993067,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942177626.819, "dur": 6.697, + "args": { + "External id": 993068,"Record function id": 0, "Ev Idx": 11051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942177628.183, "dur": 4.837, + "args": { + "External id": 993069,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942177629.041, "dur": 3.502, + "args": { + "External id": 993070,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942177629.563, "dur": 2.893, + "args": { + "External id": 993071,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942177637.790, "dur": 4.266, + "args": { + "External id": 993072,"Record function id": 0, "Ev Idx": 11055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942177639.111, "dur": 2.448, + "args": { + "External id": 993073,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942177639.678, "dur": 1.252, + "args": { + "External id": 993074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942177640.041, "dur": 0.774, + "args": { + "External id": 993075,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942177646.303, "dur": 4.210, + "args": { + "External id": 993076,"Record function id": 0, "Ev Idx": 11059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942177647.538, "dur": 2.482, + "args": { + "External id": 993077,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942177648.086, "dur": 1.324, + "args": { + "External id": 993078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942177648.555, "dur": 0.765, + "args": { + "External id": 993079,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942177655.256, "dur": 60787.206, + "args": { + "External id": 993080,"Record function id": 0, "Sequence number": 10552480, "Fwd thread id": 1, "Ev Idx": 11063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942177656.924, "dur": 60774.828, + "args": { + "External id": 993081,"Sequence number": 10552480, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11064 + } + }, + { + "ph": "f", "id": 439, "pid": 2338710, "tid": 2379450, "ts": 6345942177656.924, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.4)", "pid": 2338710, "tid": 2379450, + "ts": 6345942177690.635, "dur": 44.390, + "args": { + "External id": 993082,"Record function id": 0, "Ev Idx": 11065 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.4)", "pid": 2338710, "tid": 2379450, + "ts": 6345942177743.406, "dur": 72.211, + "args": { + "External id": 993083,"Record function id": 0, "Ev Idx": 11066 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.4)", "pid": 2338710, "tid": 2379450, + "ts": 6345942177822.495, "dur": 60597.268, + "args": { + "External id": 993084,"Record function id": 0, "Ev Idx": 11067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942177929.334, "dur": 7.786, + "args": { + "External id": 993085,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942177948.754, "dur": 5.001, + "args": { + "External id": 993086,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345942177968.886, "dur": 59265.509, + "args": { + "External id": 993087,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345942177985.646, "dur": 59233.349, + "args": { + "External id": 993088,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942178192.069, "dur": 27.762, + "args": { + "External id": 993089,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345942178249.132, "dur": 58919.773, + "args": { + "External id": 993090,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 11073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942178252.646, "dur": 58914.689, + "args": { + "External id": 993091,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 11074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942178258.825, "dur": 12.837, + "args": { + "External id": 993092,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345942178276.338, "dur": 58886.058, + "args": { + "External id": 993093,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 11076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942237366.952, "dur": 14.046, + "args": { + "External id": 993094,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 11077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942237371.185, "dur": 9.123, + "args": { + "External id": 993095,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345942237416.915, "dur": 423.200, + "args": { + "External id": 993096,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 11079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942237451.082, "dur": 382.612, + "args": { + "External id": 993097,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11080, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345942237468.053, "dur": 358.255, + "args": { + "External id": 993098,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 11081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942237865.977, "dur": 2.430, + "args": { + "External id": 993099,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11082, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942237945.742, "dur": 8.832, + "args": { + "External id": 993100,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942237971.864, "dur": 66.118, + "args": { + "External id": 993101,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942238092.431, "dur": 4.489, + "args": { + "External id": 993102,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942238107.242, "dur": 22.322, + "args": { + "External id": 993103,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942238139.028, "dur": 4.166, + "args": { + "External id": 993104,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942238150.189, "dur": 15.191, + "args": { + "External id": 993105,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942238172.462, "dur": 1.069, + "args": { + "External id": 993106,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942238180.510, "dur": 13.224, + "args": { + "External id": 993107,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942238199.070, "dur": 0.825, + "args": { + "External id": 993108,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942238205.381, "dur": 15.832, + "args": { + "External id": 993109,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942238227.271, "dur": 1.047, + "args": { + "External id": 993110,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942238234.745, "dur": 14.503, + "args": { + "External id": 993111,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942238255.865, "dur": 1.016, + "args": { + "External id": 993112,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942238262.912, "dur": 15.882, + "args": { + "External id": 993113,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942238286.938, "dur": 0.995, + "args": { + "External id": 993114,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942238293.378, "dur": 14.040, + "args": { + "External id": 993115,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942238312.382, "dur": 0.921, + "args": { + "External id": 993116,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942238318.451, "dur": 15.935, + "args": { + "External id": 993117,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942238465.184, "dur": 3466.141, + "args": { + "External id": 993118,"Record function id": 0, "Ev Idx": 11101 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.3)", "pid": 2338710, "tid": 2379450, + "ts": 6345942238492.696, "dur": 1300.745, + "args": { + "External id": 993119,"Record function id": 0, "Ev Idx": 11102 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2338710, "tid": 2379450, + "ts": 6345942238514.136, "dur": 405.525, + "args": { + "External id": 993120,"Record function id": 0, "Ev Idx": 11103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942238627.711, "dur": 6.016, + "args": { + "External id": 993121,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942238637.196, "dur": 3.737, + "args": { + "External id": 993122,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942238643.176, "dur": 1.239, + "args": { + "External id": 993123,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942238646.890, "dur": 0.824, + "args": { + "External id": 993124,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942238649.689, "dur": 1.023, + "args": { + "External id": 993125,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942238654.303, "dur": 0.982, + "args": { + "External id": 993126,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942238657.207, "dur": 1.203, + "args": { + "External id": 993127,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942238660.155, "dur": 1.745, + "args": { + "External id": 993128,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942238663.592, "dur": 0.808, + "args": { + "External id": 993129,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942238667.579, "dur": 2.967, + "args": { + "External id": 993130,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942238691.943, "dur": 187.744, + "args": { + "External id": 993131,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942238713.696, "dur": 159.293, + "args": { + "External id": 993132,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942238731.649, "dur": 18.852, + "args": { + "External id": 993133,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345942238756.310, "dur": 81.414, + "args": { + "External id": 993134,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 11117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942238759.410, "dur": 77.910, + "args": { + "External id": 993135,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 11118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942238765.096, "dur": 5.463, + "args": { + "External id": 993136,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345942238772.431, "dur": 59.765, + "args": { + "External id": 993137,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 11120 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.2", "pid": 2338710, "tid": 2379450, + "ts": 6345942239033.849, "dur": 751.239, + "args": { + "External id": 993138,"Record function id": 0, "Ev Idx": 11121 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2338710, "tid": 2379450, + "ts": 6345942239098.020, "dur": 673.239, + "args": { + "External id": 993139,"Record function id": 0, "Ev Idx": 11122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942239179.313, "dur": 9.056, + "args": { + "External id": 993140,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345942239207.481, "dur": 40.174, + "args": { + "External id": 993141,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942239214.835, "dur": 2.427, + "args": { + "External id": 993142,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942239221.340, "dur": 0.973, + "args": { + "External id": 993143,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942239227.510, "dur": 0.824, + "args": { + "External id": 993144,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942239230.071, "dur": 0.780, + "args": { + "External id": 993145,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942239232.520, "dur": 2.530, + "args": { + "External id": 993146,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942239236.245, "dur": 0.405, + "args": { + "External id": 993147,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942239238.618, "dur": 0.459, + "args": { + "External id": 993148,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942239240.929, "dur": 0.435, + "args": { + "External id": 993149,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942239242.578, "dur": 0.617, + "args": { + "External id": 993150,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345942239260.469, "dur": 56.480, + "args": { + "External id": 993151,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345942239357.752, "dur": 137.360, + "args": { + "External id": 993152,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 11135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942239370.816, "dur": 3.920, + "args": { + "External id": 993153,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345942239381.034, "dur": 13.800, + "args": { + "External id": 993154,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345942239387.192, "dur": 7.143, + "args": { + "External id": 993155,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 11138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942239391.067, "dur": 1.844, + "args": { + "External id": 993156,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345942239403.335, "dur": 28.940, + "args": { + "External id": 993157,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942239406.374, "dur": 0.636, + "args": { + "External id": 993158,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942239408.751, "dur": 2.887, + "args": { + "External id": 993159,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942239413.666, "dur": 0.373, + "args": { + "External id": 993160,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942239415.738, "dur": 0.400, + "args": { + "External id": 993161,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942239417.352, "dur": 0.660, + "args": { + "External id": 993162,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942239420.025, "dur": 0.272, + "args": { + "External id": 993163,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942239421.609, "dur": 0.585, + "args": { + "External id": 993164,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942239423.570, "dur": 0.525, + "args": { + "External id": 993165,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942239425.779, "dur": 0.538, + "args": { + "External id": 993166,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345942239446.136, "dur": 38.686, + "args": { + "External id": 993167,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345942239548.552, "dur": 141.207, + "args": { + "External id": 993168,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 11151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942239577.735, "dur": 107.704, + "args": { + "External id": 993169,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11152, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345942239589.251, "dur": 90.933, + "args": { + "External id": 993170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 11153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942239706.844, "dur": 2.132, + "args": { + "External id": 993171,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11154, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942239801.987, "dur": 2104.635, + "args": { + "External id": 993172,"Sequence number": 10552479, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11155 + } + }, + { + "ph": "f", "id": 440, "pid": 2338710, "tid": 2379450, "ts": 6345942239801.987, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942239927.404, "dur": 197.841, + "args": { + "External id": 993173,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 11156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345942240182.845, "dur": 48.391, + "args": { + "External id": 993174,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 11157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345942240255.868, "dur": 68.411, + "args": { + "External id": 993175,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 11158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942240335.480, "dur": 40.845, + "args": { + "External id": 993176,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942240383.821, "dur": 38.217, + "args": { + "External id": 993177,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942240429.349, "dur": 35.330, + "args": { + "External id": 993178,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942240474.808, "dur": 34.622, + "args": { + "External id": 993179,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345942240541.842, "dur": 29.840, + "args": { + "External id": 993180,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 11163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345942240592.715, "dur": 32.926, + "args": { + "External id": 993181,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345942240649.715, "dur": 30.245, + "args": { + "External id": 993182,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345942240696.090, "dur": 18.755, + "args": { + "External id": 993183,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942240726.991, "dur": 45.123, + "args": { + "External id": 993184,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942240776.273, "dur": 58.967, + "args": { + "External id": 993185,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345942240881.131, "dur": 378.440, + "args": { + "External id": 993186,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 11169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942240980.490, "dur": 10.480, + "args": { + "External id": 993187,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942240999.026, "dur": 2.879, + "args": { + "External id": 993188,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942241003.276, "dur": 2.747, + "args": { + "External id": 993189,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942241023.370, "dur": 7.358, + "args": { + "External id": 993190,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345942241133.384, "dur": 6.491, + "args": { + "External id": 993191,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942241135.740, "dur": 3.614, + "args": { + "External id": 993192,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345942241142.071, "dur": 39.465, + "args": { + "External id": 993193,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942241148.391, "dur": 2.132, + "args": { + "External id": 993194,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345942241183.488, "dur": 2.010, + "args": { + "External id": 993195,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942241184.657, "dur": 0.751, + "args": { + "External id": 993196,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345942241186.911, "dur": 18.133, + "args": { + "External id": 993197,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942241189.522, "dur": 0.692, + "args": { + "External id": 993198,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345942241300.101, "dur": 36.721, + "args": { + "External id": 993199,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345942241355.518, "dur": 19.958, + "args": { + "External id": 993200,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942241385.426, "dur": 61.977, + "args": { + "External id": 993201,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942241456.107, "dur": 49.948, + "args": { + "External id": 993202,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942241516.337, "dur": 26.355, + "args": { + "External id": 993203,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942241552.796, "dur": 36.582, + "args": { + "External id": 993204,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942241597.952, "dur": 34.773, + "args": { + "External id": 993205,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942241640.940, "dur": 35.779, + "args": { + "External id": 993206,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345942241697.434, "dur": 28.390, + "args": { + "External id": 993207,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 11190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345942241742.320, "dur": 28.295, + "args": { + "External id": 993208,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345942241785.255, "dur": 21.257, + "args": { + "External id": 993209,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345942241821.792, "dur": 18.074, + "args": { + "External id": 993210,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345942241852.815, "dur": 18.491, + "args": { + "External id": 993211,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 11194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942241958.878, "dur": 17.622, + "args": { + "External id": 993212,"Record function id": 0, "Ev Idx": 11195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942241963.190, "dur": 12.202, + "args": { + "External id": 993213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942241968.100, "dur": 6.129, + "args": { + "External id": 993214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942241969.981, "dur": 4.111, + "args": { + "External id": 993215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942241981.109, "dur": 6.585, + "args": { + "External id": 993216,"Record function id": 0, "Ev Idx": 11199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942241982.933, "dur": 4.035, + "args": { + "External id": 993217,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942241984.029, "dur": 2.331, + "args": { + "External id": 993218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942241985.011, "dur": 1.255, + "args": { + "External id": 993219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942241991.895, "dur": 5.121, + "args": { + "External id": 993220,"Record function id": 0, "Ev Idx": 11203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942241993.368, "dur": 3.188, + "args": { + "External id": 993221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942241994.268, "dur": 1.778, + "args": { + "External id": 993222,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942241995.156, "dur": 0.813, + "args": { + "External id": 993223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942242000.836, "dur": 5.468, + "args": { + "External id": 993224,"Record function id": 0, "Ev Idx": 11207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942242002.454, "dur": 3.332, + "args": { + "External id": 993225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942242003.526, "dur": 1.788, + "args": { + "External id": 993226,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942242004.335, "dur": 0.871, + "args": { + "External id": 993227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942242032.686, "dur": 8.001, + "args": { + "External id": 993228,"Record function id": 0, "Ev Idx": 11211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942242034.780, "dur": 5.166, + "args": { + "External id": 993229,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942242035.865, "dur": 3.075, + "args": { + "External id": 993230,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942242036.966, "dur": 1.754, + "args": { + "External id": 993231,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942242044.552, "dur": 6.874, + "args": { + "External id": 993232,"Record function id": 0, "Ev Idx": 11215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942242046.174, "dur": 4.797, + "args": { + "External id": 993233,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942242046.943, "dur": 3.583, + "args": { + "External id": 993234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942242047.649, "dur": 2.790, + "args": { + "External id": 993235,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942242101.558, "dur": 8.176, + "args": { + "External id": 993236,"Record function id": 0, "Ev Idx": 11219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942242103.844, "dur": 5.156, + "args": { + "External id": 993237,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942242105.396, "dur": 2.594, + "args": { + "External id": 993238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942242106.220, "dur": 1.579, + "args": { + "External id": 993239,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942242113.642, "dur": 4.987, + "args": { + "External id": 993240,"Record function id": 0, "Ev Idx": 11223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942242115.204, "dur": 2.927, + "args": { + "External id": 993241,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942242115.947, "dur": 1.627, + "args": { + "External id": 993242,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942242116.580, "dur": 0.879, + "args": { + "External id": 993243,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942242122.387, "dur": 5.119, + "args": { + "External id": 993244,"Record function id": 0, "Ev Idx": 11227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942242123.929, "dur": 3.091, + "args": { + "External id": 993245,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942242124.859, "dur": 1.684, + "args": { + "External id": 993246,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942242125.448, "dur": 1.010, + "args": { + "External id": 993247,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942242131.926, "dur": 60322.928, + "args": { + "External id": 993248,"Record function id": 0, "Sequence number": 10552478, "Fwd thread id": 1, "Ev Idx": 11231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942242133.582, "dur": 60307.806, + "args": { + "External id": 993249,"Sequence number": 10552478, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11232 + } + }, + { + "ph": "f", "id": 441, "pid": 2338710, "tid": 2379450, "ts": 6345942242133.582, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.3)", "pid": 2338710, "tid": 2379450, + "ts": 6345942242170.753, "dur": 46.541, + "args": { + "External id": 993250,"Record function id": 0, "Ev Idx": 11233 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.3)", "pid": 2338710, "tid": 2379450, + "ts": 6345942242226.292, "dur": 73.584, + "args": { + "External id": 993251,"Record function id": 0, "Ev Idx": 11234 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.3)", "pid": 2338710, "tid": 2379450, + "ts": 6345942242306.906, "dur": 60123.455, + "args": { + "External id": 993252,"Record function id": 0, "Ev Idx": 11235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942242412.325, "dur": 8.601, + "args": { + "External id": 993253,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942242435.669, "dur": 5.713, + "args": { + "External id": 993254,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345942242458.334, "dur": 58705.763, + "args": { + "External id": 993255,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345942242474.261, "dur": 58673.355, + "args": { + "External id": 993256,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942242598.870, "dur": 34.808, + "args": { + "External id": 993257,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345942242662.068, "dur": 58429.620, + "args": { + "External id": 993258,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 11241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942242665.393, "dur": 58424.504, + "args": { + "External id": 993259,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 11242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942242671.556, "dur": 12.359, + "args": { + "External id": 993260,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345942242689.991, "dur": 58397.220, + "args": { + "External id": 993261,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 11244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942301305.107, "dur": 16.792, + "args": { + "External id": 993262,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 11245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942301310.198, "dur": 11.003, + "args": { + "External id": 993263,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345942301365.200, "dur": 475.216, + "args": { + "External id": 993264,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 11247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942301406.015, "dur": 427.129, + "args": { + "External id": 993265,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11248, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345942301422.203, "dur": 403.295, + "args": { + "External id": 993266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 11249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942301862.840, "dur": 2.356, + "args": { + "External id": 993267,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11250, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942301934.244, "dur": 8.652, + "args": { + "External id": 993268,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942301960.824, "dur": 44.406, + "args": { + "External id": 993269,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942302042.087, "dur": 4.060, + "args": { + "External id": 993270,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942302091.123, "dur": 23.323, + "args": { + "External id": 993271,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942302125.030, "dur": 5.453, + "args": { + "External id": 993272,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942302136.673, "dur": 17.311, + "args": { + "External id": 993273,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942302169.090, "dur": 1.160, + "args": { + "External id": 993274,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942302175.218, "dur": 13.996, + "args": { + "External id": 993275,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942302194.749, "dur": 1.028, + "args": { + "External id": 993276,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942302201.577, "dur": 15.056, + "args": { + "External id": 993277,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942302221.630, "dur": 1.401, + "args": { + "External id": 993278,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942302227.444, "dur": 14.406, + "args": { + "External id": 993279,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942302248.760, "dur": 1.098, + "args": { + "External id": 993280,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942302278.424, "dur": 17.270, + "args": { + "External id": 993281,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942302309.104, "dur": 0.989, + "args": { + "External id": 993282,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942302315.955, "dur": 14.209, + "args": { + "External id": 993283,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942302334.954, "dur": 0.945, + "args": { + "External id": 993284,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942302339.820, "dur": 13.180, + "args": { + "External id": 993285,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942302475.618, "dur": 3435.341, + "args": { + "External id": 993286,"Record function id": 0, "Ev Idx": 11269 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.2)", "pid": 2338710, "tid": 2379450, + "ts": 6345942302500.616, "dur": 1296.939, + "args": { + "External id": 993287,"Record function id": 0, "Ev Idx": 11270 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2338710, "tid": 2379450, + "ts": 6345942302521.898, "dur": 384.526, + "args": { + "External id": 993288,"Record function id": 0, "Ev Idx": 11271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942302626.967, "dur": 6.320, + "args": { + "External id": 993289,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942302636.778, "dur": 3.582, + "args": { + "External id": 993290,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942302642.606, "dur": 0.894, + "args": { + "External id": 993291,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942302645.460, "dur": 1.112, + "args": { + "External id": 993292,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942302650.180, "dur": 0.815, + "args": { + "External id": 993293,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942302652.808, "dur": 1.032, + "args": { + "External id": 993294,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942302655.374, "dur": 1.292, + "args": { + "External id": 993295,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942302658.384, "dur": 2.121, + "args": { + "External id": 993296,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942302664.130, "dur": 0.615, + "args": { + "External id": 993297,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942302666.491, "dur": 3.470, + "args": { + "External id": 993298,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942302691.079, "dur": 176.823, + "args": { + "External id": 993299,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942302713.162, "dur": 149.468, + "args": { + "External id": 993300,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942302730.527, "dur": 16.718, + "args": { + "External id": 993301,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345942302752.497, "dur": 77.158, + "args": { + "External id": 993302,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 11285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942302756.225, "dur": 73.061, + "args": { + "External id": 993303,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 11286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942302760.865, "dur": 5.856, + "args": { + "External id": 993304,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345942302770.285, "dur": 58.134, + "args": { + "External id": 993305,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 11288 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.1", "pid": 2338710, "tid": 2379450, + "ts": 6345942303000.553, "dur": 787.792, + "args": { + "External id": 993306,"Record function id": 0, "Ev Idx": 11289 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2338710, "tid": 2379450, + "ts": 6345942303044.524, "dur": 728.737, + "args": { + "External id": 993307,"Record function id": 0, "Ev Idx": 11290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942303182.199, "dur": 10.223, + "args": { + "External id": 993308,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345942303212.043, "dur": 34.595, + "args": { + "External id": 993309,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942303218.076, "dur": 2.284, + "args": { + "External id": 993310,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942303223.124, "dur": 0.396, + "args": { + "External id": 993311,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942303225.478, "dur": 0.742, + "args": { + "External id": 993312,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942303227.604, "dur": 0.561, + "args": { + "External id": 993313,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942303229.977, "dur": 3.109, + "args": { + "External id": 993314,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942303234.792, "dur": 0.728, + "args": { + "External id": 993315,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942303237.144, "dur": 0.751, + "args": { + "External id": 993316,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942303239.222, "dur": 0.529, + "args": { + "External id": 993317,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942303241.170, "dur": 0.630, + "args": { + "External id": 993318,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345942303259.525, "dur": 64.933, + "args": { + "External id": 993319,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345942303367.696, "dur": 130.222, + "args": { + "External id": 993320,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 11303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942303381.173, "dur": 3.965, + "args": { + "External id": 993321,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345942303391.320, "dur": 12.520, + "args": { + "External id": 993322,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345942303396.410, "dur": 6.958, + "args": { + "External id": 993323,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 11306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942303400.958, "dur": 1.002, + "args": { + "External id": 993324,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345942303411.681, "dur": 28.214, + "args": { + "External id": 993325,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942303414.413, "dur": 0.517, + "args": { + "External id": 993326,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942303416.552, "dur": 3.520, + "args": { + "External id": 993327,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942303421.761, "dur": 0.600, + "args": { + "External id": 993328,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942303424.045, "dur": 0.528, + "args": { + "External id": 993329,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942303425.863, "dur": 0.717, + "args": { + "External id": 993330,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942303428.469, "dur": 0.300, + "args": { + "External id": 993331,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942303430.901, "dur": 0.533, + "args": { + "External id": 993332,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942303432.554, "dur": 0.421, + "args": { + "External id": 993333,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942303434.587, "dur": 0.383, + "args": { + "External id": 993334,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345942303451.050, "dur": 36.067, + "args": { + "External id": 993335,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345942303555.344, "dur": 139.143, + "args": { + "External id": 993336,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 11319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942303586.504, "dur": 104.029, + "args": { + "External id": 993337,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11320, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345942303598.881, "dur": 86.420, + "args": { + "External id": 993338,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 11321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942303712.363, "dur": 2.322, + "args": { + "External id": 993339,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11322, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942303806.874, "dur": 2079.198, + "args": { + "External id": 993340,"Sequence number": 10552477, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11323 + } + }, + { + "ph": "f", "id": 442, "pid": 2338710, "tid": 2379450, "ts": 6345942303806.874, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942303933.140, "dur": 195.440, + "args": { + "External id": 993341,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 11324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345942304185.650, "dur": 50.111, + "args": { + "External id": 993342,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 11325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345942304258.701, "dur": 69.455, + "args": { + "External id": 993343,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 11326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942304339.650, "dur": 38.460, + "args": { + "External id": 993344,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942304385.970, "dur": 37.740, + "args": { + "External id": 993345,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942304431.208, "dur": 32.719, + "args": { + "External id": 993346,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942304477.103, "dur": 33.804, + "args": { + "External id": 993347,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345942304543.535, "dur": 29.175, + "args": { + "External id": 993348,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 11331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345942304594.660, "dur": 34.649, + "args": { + "External id": 993349,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345942304655.411, "dur": 24.450, + "args": { + "External id": 993350,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345942304694.481, "dur": 18.892, + "args": { + "External id": 993351,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942304725.548, "dur": 47.823, + "args": { + "External id": 993352,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942304778.052, "dur": 38.932, + "args": { + "External id": 993353,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345942304864.640, "dur": 374.987, + "args": { + "External id": 993354,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 11337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942304960.650, "dur": 12.405, + "args": { + "External id": 993355,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942304975.490, "dur": 3.279, + "args": { + "External id": 993356,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942304980.361, "dur": 2.438, + "args": { + "External id": 993357,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942304984.217, "dur": 4.301, + "args": { + "External id": 993358,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345942305112.271, "dur": 6.562, + "args": { + "External id": 993359,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942305114.591, "dur": 3.742, + "args": { + "External id": 993360,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345942305121.015, "dur": 40.573, + "args": { + "External id": 993361,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942305128.084, "dur": 2.221, + "args": { + "External id": 993362,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345942305163.528, "dur": 2.371, + "args": { + "External id": 993363,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942305165.073, "dur": 0.738, + "args": { + "External id": 993364,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345942305167.544, "dur": 19.058, + "args": { + "External id": 993365,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942305169.866, "dur": 0.652, + "args": { + "External id": 993366,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345942305283.024, "dur": 35.238, + "args": { + "External id": 993367,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345942305337.883, "dur": 21.061, + "args": { + "External id": 993368,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942305369.592, "dur": 60.321, + "args": { + "External id": 993369,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942305437.953, "dur": 49.037, + "args": { + "External id": 993370,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942305496.671, "dur": 26.457, + "args": { + "External id": 993371,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942305532.613, "dur": 36.794, + "args": { + "External id": 993372,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942305576.991, "dur": 33.238, + "args": { + "External id": 993373,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942305618.267, "dur": 34.516, + "args": { + "External id": 993374,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345942305674.791, "dur": 26.879, + "args": { + "External id": 993375,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 11358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345942305719.365, "dur": 26.643, + "args": { + "External id": 993376,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345942305762.290, "dur": 19.845, + "args": { + "External id": 993377,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345942305797.688, "dur": 14.680, + "args": { + "External id": 993378,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345942305825.636, "dur": 23.103, + "args": { + "External id": 993379,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 11362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942305937.004, "dur": 18.278, + "args": { + "External id": 993380,"Record function id": 0, "Ev Idx": 11363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942305941.476, "dur": 12.552, + "args": { + "External id": 993381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942305946.402, "dur": 6.580, + "args": { + "External id": 993382,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942305948.542, "dur": 4.306, + "args": { + "External id": 993383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942305959.819, "dur": 6.681, + "args": { + "External id": 993384,"Record function id": 0, "Ev Idx": 11367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942305961.919, "dur": 4.023, + "args": { + "External id": 993385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942305962.924, "dur": 2.483, + "args": { + "External id": 993386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942305963.916, "dur": 1.387, + "args": { + "External id": 993387,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942305970.662, "dur": 5.780, + "args": { + "External id": 993388,"Record function id": 0, "Ev Idx": 11371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942305972.173, "dur": 3.755, + "args": { + "External id": 993389,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942305972.866, "dur": 2.558, + "args": { + "External id": 993390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942305973.864, "dur": 1.479, + "args": { + "External id": 993391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942305980.248, "dur": 5.383, + "args": { + "External id": 993392,"Record function id": 0, "Ev Idx": 11375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942305981.868, "dur": 3.240, + "args": { + "External id": 993393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942305982.665, "dur": 1.953, + "args": { + "External id": 993394,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942305983.644, "dur": 0.894, + "args": { + "External id": 993395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942306001.037, "dur": 4.632, + "args": { + "External id": 993396,"Record function id": 0, "Ev Idx": 11379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942306002.242, "dur": 2.896, + "args": { + "External id": 993397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942306002.941, "dur": 1.509, + "args": { + "External id": 993398,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942306003.315, "dur": 1.051, + "args": { + "External id": 993399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942306031.692, "dur": 11.064, + "args": { + "External id": 993400,"Record function id": 0, "Ev Idx": 11383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942306034.288, "dur": 7.642, + "args": { + "External id": 993401,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942306035.565, "dur": 5.143, + "args": { + "External id": 993402,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942306036.251, "dur": 4.221, + "args": { + "External id": 993403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942306046.975, "dur": 50.147, + "args": { + "External id": 993404,"Record function id": 0, "Ev Idx": 11387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942306048.543, "dur": 47.155, + "args": { + "External id": 993405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942306049.142, "dur": 43.947, + "args": { + "External id": 993406,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942306090.385, "dur": 2.077, + "args": { + "External id": 993407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942306104.097, "dur": 11.898, + "args": { + "External id": 993408,"Record function id": 0, "Ev Idx": 11391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942306106.000, "dur": 9.476, + "args": { + "External id": 993409,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942306107.270, "dur": 7.667, + "args": { + "External id": 993410,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942306113.815, "dur": 0.999, + "args": { + "External id": 993411,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942306120.110, "dur": 4.395, + "args": { + "External id": 993412,"Record function id": 0, "Ev Idx": 11395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942306121.335, "dur": 2.695, + "args": { + "External id": 993413,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942306121.969, "dur": 1.526, + "args": { + "External id": 993414,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942306122.558, "dur": 0.836, + "args": { + "External id": 993415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942306129.457, "dur": 61646.590, + "args": { + "External id": 993416,"Record function id": 0, "Sequence number": 10552476, "Fwd thread id": 1, "Ev Idx": 11399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942306131.203, "dur": 61633.394, + "args": { + "External id": 993417,"Sequence number": 10552476, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11400 + } + }, + { + "ph": "f", "id": 443, "pid": 2338710, "tid": 2379450, "ts": 6345942306131.203, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.2)", "pid": 2338710, "tid": 2379450, + "ts": 6345942306169.072, "dur": 46.879, + "args": { + "External id": 993418,"Record function id": 0, "Ev Idx": 11401 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.2)", "pid": 2338710, "tid": 2379450, + "ts": 6345942306225.518, "dur": 76.475, + "args": { + "External id": 993419,"Record function id": 0, "Ev Idx": 11402 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.2)", "pid": 2338710, "tid": 2379450, + "ts": 6345942306308.756, "dur": 61446.137, + "args": { + "External id": 993420,"Record function id": 0, "Ev Idx": 11403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942306417.386, "dur": 8.373, + "args": { + "External id": 993421,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942306437.225, "dur": 6.301, + "args": { + "External id": 993422,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345942306459.696, "dur": 60118.945, + "args": { + "External id": 993423,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345942306476.536, "dur": 60085.645, + "args": { + "External id": 993424,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942306633.123, "dur": 24.901, + "args": { + "External id": 993425,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345942306686.850, "dur": 59825.317, + "args": { + "External id": 993426,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 11409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942306691.062, "dur": 59819.241, + "args": { + "External id": 993427,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 11410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942306696.362, "dur": 10.777, + "args": { + "External id": 993428,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345942306709.437, "dur": 59797.109, + "args": { + "External id": 993429,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 11412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942366720.005, "dur": 16.418, + "args": { + "External id": 993430,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 11413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942366724.674, "dur": 11.259, + "args": { + "External id": 993431,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345942366777.315, "dur": 489.615, + "args": { + "External id": 993432,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 11415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942366816.036, "dur": 443.126, + "args": { + "External id": 993433,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11416, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345942366829.550, "dur": 419.750, + "args": { + "External id": 993434,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 11417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942367295.060, "dur": 3.302, + "args": { + "External id": 993435,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11418, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942367373.963, "dur": 9.239, + "args": { + "External id": 993436,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942367402.079, "dur": 42.495, + "args": { + "External id": 993437,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942367456.459, "dur": 4.775, + "args": { + "External id": 993438,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942367468.135, "dur": 15.607, + "args": { + "External id": 993439,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942367494.636, "dur": 1.114, + "args": { + "External id": 993440,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942367502.122, "dur": 14.933, + "args": { + "External id": 993441,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942367523.216, "dur": 1.070, + "args": { + "External id": 993442,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942367530.684, "dur": 13.880, + "args": { + "External id": 993443,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942367554.446, "dur": 1.432, + "args": { + "External id": 993444,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942367561.601, "dur": 14.752, + "args": { + "External id": 993445,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942367581.102, "dur": 1.350, + "args": { + "External id": 993446,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942367587.325, "dur": 13.966, + "args": { + "External id": 993447,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942367607.907, "dur": 1.079, + "args": { + "External id": 993448,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942367614.590, "dur": 14.664, + "args": { + "External id": 993449,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942367633.864, "dur": 1.073, + "args": { + "External id": 993450,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942367639.508, "dur": 16.095, + "args": { + "External id": 993451,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942367660.558, "dur": 1.066, + "args": { + "External id": 993452,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942367666.290, "dur": 14.276, + "args": { + "External id": 993453,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942367795.033, "dur": 3449.755, + "args": { + "External id": 993454,"Record function id": 0, "Ev Idx": 11437 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.1)", "pid": 2338710, "tid": 2379450, + "ts": 6345942367819.350, "dur": 1331.908, + "args": { + "External id": 993455,"Record function id": 0, "Ev Idx": 11438 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2338710, "tid": 2379450, + "ts": 6345942367837.804, "dur": 453.158, + "args": { + "External id": 993456,"Record function id": 0, "Ev Idx": 11439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942367932.781, "dur": 7.157, + "args": { + "External id": 993457,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942367943.838, "dur": 1.200, + "args": { + "External id": 993458,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942367947.425, "dur": 1.206, + "args": { + "External id": 993459,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942367951.029, "dur": 1.064, + "args": { + "External id": 993460,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942367956.118, "dur": 0.818, + "args": { + "External id": 993461,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942367958.975, "dur": 1.099, + "args": { + "External id": 993462,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942367961.711, "dur": 1.477, + "args": { + "External id": 993463,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942367964.736, "dur": 1.822, + "args": { + "External id": 993464,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942367969.954, "dur": 3.311, + "args": { + "External id": 993465,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942367974.951, "dur": 0.855, + "args": { + "External id": 993466,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942368005.448, "dur": 243.131, + "args": { + "External id": 993467,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942368047.771, "dur": 194.175, + "args": { + "External id": 993468,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942368103.571, "dur": 19.194, + "args": { + "External id": 993469,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345942368129.042, "dur": 80.243, + "args": { + "External id": 993470,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 11453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942368132.396, "dur": 76.492, + "args": { + "External id": 993471,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 11454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942368137.130, "dur": 7.050, + "args": { + "External id": 993472,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345942368146.203, "dur": 62.015, + "args": { + "External id": 993473,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 11456 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.0", "pid": 2338710, "tid": 2379450, + "ts": 6345942368398.331, "dur": 743.624, + "args": { + "External id": 993474,"Record function id": 0, "Ev Idx": 11457 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2338710, "tid": 2379450, + "ts": 6345942368419.364, "dur": 706.040, + "args": { + "External id": 993475,"Record function id": 0, "Ev Idx": 11458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942368494.766, "dur": 7.245, + "args": { + "External id": 993476,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345942368519.439, "dur": 33.057, + "args": { + "External id": 993477,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942368525.956, "dur": 2.102, + "args": { + "External id": 993478,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942368529.946, "dur": 0.820, + "args": { + "External id": 993479,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942368532.946, "dur": 0.405, + "args": { + "External id": 993480,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942368534.949, "dur": 2.977, + "args": { + "External id": 993481,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942368539.353, "dur": 0.648, + "args": { + "External id": 993482,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942368541.905, "dur": 0.396, + "args": { + "External id": 993483,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942368544.369, "dur": 0.389, + "args": { + "External id": 993484,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942368545.736, "dur": 0.456, + "args": { + "External id": 993485,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942368548.206, "dur": 0.371, + "args": { + "External id": 993486,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345942368565.392, "dur": 48.937, + "args": { + "External id": 993487,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2379450, + "ts": 6345942368650.953, "dur": 130.116, + "args": { + "External id": 993488,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 11471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942368662.897, "dur": 4.079, + "args": { + "External id": 993489,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2379450, + "ts": 6345942368672.978, "dur": 12.111, + "args": { + "External id": 993490,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2379450, + "ts": 6345942368678.206, "dur": 6.396, + "args": { + "External id": 993491,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 11474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942368682.146, "dur": 1.064, + "args": { + "External id": 993492,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2379450, + "ts": 6345942368692.500, "dur": 31.837, + "args": { + "External id": 993493,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942368694.518, "dur": 2.866, + "args": { + "External id": 993494,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942368699.225, "dur": 0.485, + "args": { + "External id": 993495,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942368702.033, "dur": 0.787, + "args": { + "External id": 993496,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942368703.970, "dur": 0.857, + "args": { + "External id": 993497,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942368707.141, "dur": 0.424, + "args": { + "External id": 993498,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942368709.196, "dur": 0.601, + "args": { + "External id": 993499,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942368711.170, "dur": 0.546, + "args": { + "External id": 993500,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942368713.649, "dur": 0.496, + "args": { + "External id": 993501,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942368715.962, "dur": 2.634, + "args": { + "External id": 993502,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345942368739.522, "dur": 33.409, + "args": { + "External id": 993503,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345942368831.941, "dur": 140.116, + "args": { + "External id": 993504,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 11487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942368863.507, "dur": 104.487, + "args": { + "External id": 993505,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11488, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2379450, + "ts": 6345942368875.021, "dur": 88.158, + "args": { + "External id": 993506,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 11489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942368991.489, "dur": 1.929, + "args": { + "External id": 993507,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11490, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942369161.153, "dur": 2059.069, + "args": { + "External id": 993508,"Sequence number": 10552475, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11491 + } + }, + { + "ph": "f", "id": 444, "pid": 2338710, "tid": 2379450, "ts": 6345942369161.153, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942369299.927, "dur": 136.983, + "args": { + "External id": 993509,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 11492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345942369488.049, "dur": 47.147, + "args": { + "External id": 993510,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 11493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345942369557.573, "dur": 59.178, + "args": { + "External id": 993511,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 11494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942369628.022, "dur": 36.301, + "args": { + "External id": 993512,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942369671.373, "dur": 37.726, + "args": { + "External id": 993513,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942369716.415, "dur": 32.308, + "args": { + "External id": 993514,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942369759.026, "dur": 34.541, + "args": { + "External id": 993515,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345942369823.542, "dur": 29.509, + "args": { + "External id": 993516,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 11499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345942369873.399, "dur": 34.314, + "args": { + "External id": 993517,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345942369932.595, "dur": 23.893, + "args": { + "External id": 993518,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345942369971.497, "dur": 18.343, + "args": { + "External id": 993519,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942370001.955, "dur": 114.196, + "args": { + "External id": 993520,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942370123.531, "dur": 44.616, + "args": { + "External id": 993521,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345942370219.615, "dur": 294.450, + "args": { + "External id": 993522,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 11505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942370315.791, "dur": 9.034, + "args": { + "External id": 993523,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942370327.545, "dur": 3.246, + "args": { + "External id": 993524,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942370332.149, "dur": 2.338, + "args": { + "External id": 993525,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942370335.622, "dur": 4.514, + "args": { + "External id": 993526,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345942370398.062, "dur": 6.208, + "args": { + "External id": 993527,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942370400.258, "dur": 3.751, + "args": { + "External id": 993528,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345942370406.563, "dur": 36.440, + "args": { + "External id": 993529,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942370413.692, "dur": 2.055, + "args": { + "External id": 993530,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345942370444.593, "dur": 2.513, + "args": { + "External id": 993531,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942370446.340, "dur": 0.607, + "args": { + "External id": 993532,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345942370448.370, "dur": 16.913, + "args": { + "External id": 993533,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942370451.031, "dur": 0.660, + "args": { + "External id": 993534,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345942370554.375, "dur": 34.397, + "args": { + "External id": 993535,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345942370606.985, "dur": 18.124, + "args": { + "External id": 993536,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942370634.176, "dur": 52.795, + "args": { + "External id": 993537,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942370695.029, "dur": 46.150, + "args": { + "External id": 993538,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942370752.555, "dur": 24.745, + "args": { + "External id": 993539,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942370784.667, "dur": 35.590, + "args": { + "External id": 993540,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942370828.946, "dur": 33.414, + "args": { + "External id": 993541,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942370870.086, "dur": 35.083, + "args": { + "External id": 993542,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345942370924.992, "dur": 27.450, + "args": { + "External id": 993543,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 11526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345942370969.708, "dur": 27.780, + "args": { + "External id": 993544,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345942371035.704, "dur": 62.302, + "args": { + "External id": 993545,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345942371120.532, "dur": 20.555, + "args": { + "External id": 993546,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345942371156.633, "dur": 23.795, + "args": { + "External id": 993547,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 11530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942371272.753, "dur": 18.603, + "args": { + "External id": 993548,"Record function id": 0, "Ev Idx": 11531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942371277.043, "dur": 13.016, + "args": { + "External id": 993549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942371282.126, "dur": 6.872, + "args": { + "External id": 993550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942371284.108, "dur": 4.752, + "args": { + "External id": 993551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942371296.065, "dur": 7.252, + "args": { + "External id": 993552,"Record function id": 0, "Ev Idx": 11535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942371297.883, "dur": 4.812, + "args": { + "External id": 993553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942371298.692, "dur": 3.268, + "args": { + "External id": 993554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942371300.417, "dur": 1.387, + "args": { + "External id": 993555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942371307.186, "dur": 5.053, + "args": { + "External id": 993556,"Record function id": 0, "Ev Idx": 11539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942371308.658, "dur": 3.053, + "args": { + "External id": 993557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942371309.301, "dur": 1.898, + "args": { + "External id": 993558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942371310.196, "dur": 0.902, + "args": { + "External id": 993559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942371315.985, "dur": 4.538, + "args": { + "External id": 993560,"Record function id": 0, "Ev Idx": 11543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942371317.027, "dur": 2.966, + "args": { + "External id": 993561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942371317.908, "dur": 1.587, + "args": { + "External id": 993562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942371318.373, "dur": 1.014, + "args": { + "External id": 993563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942371324.132, "dur": 4.592, + "args": { + "External id": 993564,"Record function id": 0, "Ev Idx": 11547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942371325.634, "dur": 2.581, + "args": { + "External id": 993565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942371326.294, "dur": 1.389, + "args": { + "External id": 993566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942371326.797, "dur": 0.811, + "args": { + "External id": 993567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942371332.437, "dur": 6.262, + "args": { + "External id": 993568,"Record function id": 0, "Ev Idx": 11551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942371333.792, "dur": 4.409, + "args": { + "External id": 993569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942371334.522, "dur": 3.088, + "args": { + "External id": 993570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942371334.837, "dur": 2.663, + "args": { + "External id": 993571,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942371342.466, "dur": 5.429, + "args": { + "External id": 993572,"Record function id": 0, "Ev Idx": 11555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942371344.382, "dur": 2.990, + "args": { + "External id": 993573,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942371345.358, "dur": 1.537, + "args": { + "External id": 993574,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942371345.899, "dur": 0.919, + "args": { + "External id": 993575,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942371351.761, "dur": 5.227, + "args": { + "External id": 993576,"Record function id": 0, "Ev Idx": 11559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942371353.276, "dur": 3.207, + "args": { + "External id": 993577,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942371354.068, "dur": 1.875, + "args": { + "External id": 993578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942371354.790, "dur": 1.007, + "args": { + "External id": 993579,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942371360.925, "dur": 4.677, + "args": { + "External id": 993580,"Record function id": 0, "Ev Idx": 11563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942371362.433, "dur": 2.675, + "args": { + "External id": 993581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942371363.034, "dur": 1.421, + "args": { + "External id": 993582,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942371363.709, "dur": 0.671, + "args": { + "External id": 993583,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942371370.283, "dur": 60565.304, + "args": { + "External id": 993584,"Record function id": 0, "Sequence number": 10552474, "Fwd thread id": 1, "Ev Idx": 11567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942371371.806, "dur": 60552.562, + "args": { + "External id": 993585,"Sequence number": 10552474, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11568 + } + }, + { + "ph": "f", "id": 445, "pid": 2338710, "tid": 2379450, "ts": 6345942371371.806, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.1)", "pid": 2338710, "tid": 2379450, + "ts": 6345942371406.556, "dur": 42.136, + "args": { + "External id": 993586,"Record function id": 0, "Ev Idx": 11569 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.1)", "pid": 2338710, "tid": 2379450, + "ts": 6345942371456.982, "dur": 75.705, + "args": { + "External id": 993587,"Record function id": 0, "Ev Idx": 11570 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.1)", "pid": 2338710, "tid": 2379450, + "ts": 6345942371539.446, "dur": 60374.983, + "args": { + "External id": 993588,"Record function id": 0, "Ev Idx": 11571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942371642.530, "dur": 8.315, + "args": { + "External id": 993589,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942371662.408, "dur": 5.491, + "args": { + "External id": 993590,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345942371683.941, "dur": 58998.899, + "args": { + "External id": 993591,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345942371701.270, "dur": 58964.371, + "args": { + "External id": 993592,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942371819.909, "dur": 22.580, + "args": { + "External id": 993593,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345942371866.864, "dur": 58735.921, + "args": { + "External id": 993594,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 11577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942371871.401, "dur": 58730.226, + "args": { + "External id": 993595,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 11578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942371889.714, "dur": 12.917, + "args": { + "External id": 993596,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345942371905.875, "dur": 58689.817, + "args": { + "External id": 993597,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 11580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942430821.948, "dur": 16.602, + "args": { + "External id": 993598,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 11581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942430827.025, "dur": 10.980, + "args": { + "External id": 993599,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345942430878.983, "dur": 542.804, + "args": { + "External id": 993600,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 11583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942430913.440, "dur": 500.627, + "args": { + "External id": 993601,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11584, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345942430928.024, "dur": 477.316, + "args": { + "External id": 993602,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 11585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942431451.642, "dur": 2.403, + "args": { + "External id": 993603,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11586, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942431531.691, "dur": 9.054, + "args": { + "External id": 993604,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942431557.941, "dur": 41.768, + "args": { + "External id": 993605,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942431612.417, "dur": 2.986, + "args": { + "External id": 993606,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942431628.864, "dur": 17.341, + "args": { + "External id": 993607,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942431652.627, "dur": 1.266, + "args": { + "External id": 993608,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942431659.827, "dur": 15.872, + "args": { + "External id": 993609,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942431683.051, "dur": 1.241, + "args": { + "External id": 993610,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942431689.880, "dur": 14.859, + "args": { + "External id": 993611,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942431713.112, "dur": 4.303, + "args": { + "External id": 993612,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942431721.896, "dur": 14.805, + "args": { + "External id": 993613,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942431741.476, "dur": 1.306, + "args": { + "External id": 993614,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942431747.365, "dur": 13.809, + "args": { + "External id": 993615,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942431771.606, "dur": 1.153, + "args": { + "External id": 993616,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942431778.632, "dur": 14.558, + "args": { + "External id": 993617,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942431798.207, "dur": 1.130, + "args": { + "External id": 993618,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942431803.641, "dur": 14.259, + "args": { + "External id": 993619,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942431822.649, "dur": 1.109, + "args": { + "External id": 993620,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942431827.645, "dur": 14.332, + "args": { + "External id": 993621,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942431955.127, "dur": 2748.044, + "args": { + "External id": 993622,"Record function id": 0, "Ev Idx": 11605 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.0)", "pid": 2338710, "tid": 2379450, + "ts": 6345942431978.514, "dur": 587.912, + "args": { + "External id": 993623,"Record function id": 0, "Ev Idx": 11606 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2338710, "tid": 2379450, + "ts": 6345942431996.536, "dur": 458.366, + "args": { + "External id": 993624,"Record function id": 0, "Ev Idx": 11607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942432166.850, "dur": 7.421, + "args": { + "External id": 993625,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942432178.366, "dur": 1.632, + "args": { + "External id": 993626,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942432182.312, "dur": 1.257, + "args": { + "External id": 993627,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942432185.505, "dur": 3.605, + "args": { + "External id": 993628,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942432190.756, "dur": 1.318, + "args": { + "External id": 993629,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942432195.727, "dur": 1.273, + "args": { + "External id": 993630,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942432198.665, "dur": 1.022, + "args": { + "External id": 993631,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942432201.115, "dur": 2.020, + "args": { + "External id": 993632,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942432205.401, "dur": 0.833, + "args": { + "External id": 993633,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942432210.147, "dur": 1.163, + "args": { + "External id": 993634,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942432232.897, "dur": 184.693, + "args": { + "External id": 993635,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942432254.683, "dur": 157.873, + "args": { + "External id": 993636,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942432272.361, "dur": 17.904, + "args": { + "External id": 993637,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345942432295.294, "dur": 85.796, + "args": { + "External id": 993638,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 11621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942432298.501, "dur": 82.173, + "args": { + "External id": 993639,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 11622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942432303.101, "dur": 10.133, + "args": { + "External id": 993640,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345942432315.454, "dur": 64.503, + "args": { + "External id": 993641,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 11624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942432575.348, "dur": 2096.025, + "args": { + "External id": 993642,"Sequence number": 10552473, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11625 + } + }, + { + "ph": "f", "id": 446, "pid": 2338710, "tid": 2379450, "ts": 6345942432575.348, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942432703.894, "dur": 128.222, + "args": { + "External id": 993643,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 11626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338710, "tid": 2379450, + "ts": 6345942432879.212, "dur": 48.683, + "args": { + "External id": 993644,"kernel_hash": "cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/j7/cj7a47fzenagkzhlhxrxdlqstq7bg7zqy4egmin3jy4cjzfa2fi2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 11627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338710, "tid": 2379450, + "ts": 6345942432951.741, "dur": 80.708, + "args": { + "External id": 993645,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 11628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942433090.380, "dur": 52.370, + "args": { + "External id": 993646,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942433154.952, "dur": 39.945, + "args": { + "External id": 993647,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942433204.573, "dur": 37.285, + "args": { + "External id": 993648,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942433253.226, "dur": 37.004, + "args": { + "External id": 993649,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338710, "tid": 2379450, + "ts": 6345942433320.485, "dur": 33.959, + "args": { + "External id": 993650,"kernel_hash": "ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/t6/ct6pdrwin6gpbbhb3ubmfazw3qma3gzsewwbv4culi3mwmzponsp.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 11633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338710, "tid": 2379450, + "ts": 6345942433374.662, "dur": 44.050, + "args": { + "External id": 993651,"kernel_hash": "cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/fh/cfha2kk6ylmk6fzcisr2hmw7gbvd62evmko6izxkbgo773kpqpv7.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345942433445.027, "dur": 22.610, + "args": { + "External id": 993652,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345942433484.402, "dur": 17.163, + "args": { + "External id": 993653,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942433514.339, "dur": 43.435, + "args": { + "External id": 993654,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942433561.880, "dur": 36.835, + "args": { + "External id": 993655,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338710, "tid": 2379450, + "ts": 6345942433632.131, "dur": 276.228, + "args": { + "External id": 993656,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 11639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942433721.601, "dur": 8.058, + "args": { + "External id": 993657,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942433731.934, "dur": 3.663, + "args": { + "External id": 993658,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942433737.415, "dur": 2.850, + "args": { + "External id": 993659,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942433741.428, "dur": 4.974, + "args": { + "External id": 993660,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345942433794.842, "dur": 5.635, + "args": { + "External id": 993661,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942433796.972, "dur": 3.349, + "args": { + "External id": 993662,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345942433802.591, "dur": 35.572, + "args": { + "External id": 993663,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942433809.317, "dur": 2.098, + "args": { + "External id": 993664,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2379450, + "ts": 6345942433839.864, "dur": 1.788, + "args": { + "External id": 993665,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942433840.977, "dur": 0.567, + "args": { + "External id": 993666,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2379450, + "ts": 6345942433842.902, "dur": 16.051, + "args": { + "External id": 993667,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942433845.677, "dur": 0.638, + "args": { + "External id": 993668,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2379450, + "ts": 6345942433947.300, "dur": 32.393, + "args": { + "External id": 993669,"kernel_hash": "cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/n3/cn3y2zipjjhfo3h2phytb3nb5ytxk4unc5shfjjsauycguastfpl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2379450, + "ts": 6345942433999.192, "dur": 41.192, + "args": { + "External id": 993670,"kernel_hash": "cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/c5/cc52kbqo4hhxyke6bamdvdcwi7wb5xue3fhqbjqsziegdnihxq3v.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942434091.227, "dur": 64.472, + "args": { + "External id": 993671,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942434166.388, "dur": 48.640, + "args": { + "External id": 993672,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942434229.574, "dur": 26.162, + "args": { + "External id": 993673,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942434263.808, "dur": 36.526, + "args": { + "External id": 993674,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942434324.334, "dur": 45.543, + "args": { + "External id": 993675,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2379450, + "ts": 6345942434380.821, "dur": 35.797, + "args": { + "External id": 993676,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338710, "tid": 2379450, + "ts": 6345942434445.061, "dur": 32.087, + "args": { + "External id": 993677,"kernel_hash": "cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qs/cqsmoyietljwwem74pqj7rw77h2baol67y2jdqdgbsq7tblfkrf4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 11660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338710, "tid": 2379450, + "ts": 6345942434499.211, "dur": 27.658, + "args": { + "External id": 993678,"kernel_hash": "cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uk/cukcqlutgfm5qgp3jgdu3zjttoeb2ky7bgr2io5daf72hfzazg44.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338710, "tid": 2379450, + "ts": 6345942434544.326, "dur": 19.911, + "args": { + "External id": 993679,"kernel_hash": "cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/yz/cyzcbvagv6sqe54tdiwtt2pobis2dvsgukvmbxe6inqo2qavtntb.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338710, "tid": 2379450, + "ts": 6345942434581.397, "dur": 16.189, + "args": { + "External id": 993680,"kernel_hash": "cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/v5/cv5chmvmr3gn5d7eebb6pg3x4tnjvbuhdglamj3f65njpp45ulv6.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338710, "tid": 2379450, + "ts": 6345942434612.717, "dur": 21.678, + "args": { + "External id": 993681,"kernel_hash": "cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/em/cemjdqi5ghk3i77b2rg37v3lyvjbugp7r7bobzjsarysugcnwrjj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 11664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942434729.086, "dur": 19.256, + "args": { + "External id": 993682,"Record function id": 0, "Ev Idx": 11665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942434733.788, "dur": 13.370, + "args": { + "External id": 993683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942434739.137, "dur": 6.980, + "args": { + "External id": 993684,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942434741.231, "dur": 4.757, + "args": { + "External id": 993685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942434752.863, "dur": 6.372, + "args": { + "External id": 993686,"Record function id": 0, "Ev Idx": 11669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942434754.523, "dur": 4.169, + "args": { + "External id": 993687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942434755.319, "dur": 2.859, + "args": { + "External id": 993688,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942434756.576, "dur": 1.366, + "args": { + "External id": 993689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942434763.193, "dur": 6.128, + "args": { + "External id": 993690,"Record function id": 0, "Ev Idx": 11673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942434765.402, "dur": 3.414, + "args": { + "External id": 993691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942434766.283, "dur": 2.013, + "args": { + "External id": 993692,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942434767.063, "dur": 1.132, + "args": { + "External id": 993693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942434773.005, "dur": 5.547, + "args": { + "External id": 993694,"Record function id": 0, "Ev Idx": 11677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942434774.671, "dur": 3.387, + "args": { + "External id": 993695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942434775.460, "dur": 2.113, + "args": { + "External id": 993696,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942434776.302, "dur": 1.194, + "args": { + "External id": 993697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942434782.328, "dur": 5.760, + "args": { + "External id": 993698,"Record function id": 0, "Ev Idx": 11681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942434784.182, "dur": 3.405, + "args": { + "External id": 993699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942434785.394, "dur": 1.700, + "args": { + "External id": 993700,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942434785.977, "dur": 1.042, + "args": { + "External id": 993701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942434791.758, "dur": 5.779, + "args": { + "External id": 993702,"Record function id": 0, "Ev Idx": 11685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942434793.796, "dur": 3.234, + "args": { + "External id": 993703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942434794.442, "dur": 1.926, + "args": { + "External id": 993704,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942434795.179, "dur": 1.113, + "args": { + "External id": 993705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942434801.352, "dur": 7.723, + "args": { + "External id": 993706,"Record function id": 0, "Ev Idx": 11689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942434803.231, "dur": 5.359, + "args": { + "External id": 993707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942434804.256, "dur": 3.852, + "args": { + "External id": 993708,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942434804.955, "dur": 3.079, + "args": { + "External id": 993709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942434812.735, "dur": 4.914, + "args": { + "External id": 993710,"Record function id": 0, "Ev Idx": 11693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942434814.484, "dur": 2.593, + "args": { + "External id": 993711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942434815.178, "dur": 1.411, + "args": { + "External id": 993712,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942434815.721, "dur": 0.750, + "args": { + "External id": 993713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942434821.927, "dur": 5.335, + "args": { + "External id": 993714,"Record function id": 0, "Ev Idx": 11697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942434823.648, "dur": 3.126, + "args": { + "External id": 993715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942434824.626, "dur": 1.567, + "args": { + "External id": 993716,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942434825.348, "dur": 0.768, + "args": { + "External id": 993717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942434832.097, "dur": 61561.999, + "args": { + "External id": 993718,"Record function id": 0, "Sequence number": 10552472, "Fwd thread id": 1, "Ev Idx": 11701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942434833.722, "dur": 61549.910, + "args": { + "External id": 993719,"Sequence number": 10552472, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11702 + } + }, + { + "ph": "f", "id": 447, "pid": 2338710, "tid": 2379450, "ts": 6345942434833.722, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.0)", "pid": 2338710, "tid": 2379450, + "ts": 6345942434871.611, "dur": 48.838, + "args": { + "External id": 993720,"Record function id": 0, "Ev Idx": 11703 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.0)", "pid": 2338710, "tid": 2379450, + "ts": 6345942434929.459, "dur": 103.145, + "args": { + "External id": 993721,"Record function id": 0, "Ev Idx": 11704 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.0)", "pid": 2338710, "tid": 2379450, + "ts": 6345942435042.509, "dur": 61330.842, + "args": { + "External id": 993722,"Record function id": 0, "Ev Idx": 11705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942435199.116, "dur": 9.418, + "args": { + "External id": 993723,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942435221.995, "dur": 6.217, + "args": { + "External id": 993724,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345942435245.722, "dur": 59991.908, + "args": { + "External id": 993725,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345942435263.620, "dur": 59957.327, + "args": { + "External id": 993726,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942435378.770, "dur": 22.596, + "args": { + "External id": 993727,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345942435425.665, "dur": 59741.014, + "args": { + "External id": 993728,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 11711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942435430.144, "dur": 59735.206, + "args": { + "External id": 993729,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 11712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942435436.488, "dur": 9.385, + "args": { + "External id": 993730,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345942435448.837, "dur": 59709.825, + "args": { + "External id": 993731,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 11714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942495369.123, "dur": 14.462, + "args": { + "External id": 993732,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 11715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942495374.153, "dur": 8.988, + "args": { + "External id": 993733,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345942495419.285, "dur": 424.770, + "args": { + "External id": 993734,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 11717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942495454.560, "dur": 383.413, + "args": { + "External id": 993735,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11718, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345942495468.227, "dur": 362.498, + "args": { + "External id": 993736,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 11719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942495867.778, "dur": 2.576, + "args": { + "External id": 993737,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11720, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942495936.970, "dur": 8.856, + "args": { + "External id": 993738,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942495963.915, "dur": 43.012, + "args": { + "External id": 993739,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942496044.590, "dur": 3.747, + "args": { + "External id": 993740,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942496094.618, "dur": 24.109, + "args": { + "External id": 993741,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942496127.633, "dur": 3.008, + "args": { + "External id": 993742,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942496136.897, "dur": 16.391, + "args": { + "External id": 993743,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942496159.263, "dur": 1.550, + "args": { + "External id": 993744,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942496166.643, "dur": 13.512, + "args": { + "External id": 993745,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942496184.864, "dur": 3.688, + "args": { + "External id": 993746,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942496193.285, "dur": 12.958, + "args": { + "External id": 993747,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942496210.791, "dur": 0.979, + "args": { + "External id": 993748,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942496216.503, "dur": 12.649, + "args": { + "External id": 993749,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942496233.549, "dur": 1.112, + "args": { + "External id": 993750,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942496239.587, "dur": 14.638, + "args": { + "External id": 993751,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942496260.982, "dur": 1.054, + "args": { + "External id": 993752,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942496266.629, "dur": 12.109, + "args": { + "External id": 993753,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942496283.195, "dur": 1.208, + "args": { + "External id": 993754,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942496289.447, "dur": 13.820, + "args": { + "External id": 993755,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942496413.672, "dur": 314.937, + "args": { + "External id": 993756,"Record function id": 0, "Sequence number": 10552471, "Fwd thread id": 1, "Ev Idx": 11739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338710, "tid": 2379450, + "ts": 6345942496416.803, "dur": 302.421, + "args": { + "External id": 993757,"Sequence number": 10552471, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11740 + } + }, + { + "ph": "f", "id": 448, "pid": 2338710, "tid": 2379450, "ts": 6345942496416.803, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_0", "pid": 2338710, "tid": 2379450, + "ts": 6345942496561.463, "dur": 45.477, + "args": { + "External id": 993758,"kernel_hash": "ci46ycwqu4mevlefaywvsre33v5ki6y5q5evkfmt4t2fie5todnp", "grid": "grid(131072000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "131072000"], "kernel_file": "/tmp/torchinductor_cvm/i4/ci46ycwqu4mevlefaywvsre33v5ki6y5q5evkfmt4t2fie5todnp.py", "kernel_backend": "triton", "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 11741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_1", "pid": 2338710, "tid": 2379450, + "ts": 6345942496625.664, "dur": 29.448, + "args": { + "External id": 993759,"kernel_hash": "c4hfnruzhc7gtb7rldzmrokxpon7tcgieufpcwt6pxhd3syqv6zy", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/4h/c4hfnruzhc7gtb7rldzmrokxpon7tcgieufpcwt6pxhd3syqv6zy.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096, 4096], [32000, 4096], []], "Ev Idx": 11742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_2", "pid": 2338710, "tid": 2379450, + "ts": 6345942496676.024, "dur": 25.397, + "args": { + "External id": 993760,"kernel_hash": "cagfbb4snc4nnt5qtlfe5j5npoahg3qjke5pbuljm6ouabijdwft", "grid": "grid(131072000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "131072000"], "kernel_file": "/tmp/torchinductor_cvm/ag/cagfbb4snc4nnt5qtlfe5j5npoahg3qjke5pbuljm6ouabijdwft.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 11743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942496739.943, "dur": 20.517, + "args": { + "External id": 993761,"Record function id": 0, "Ev Idx": 11744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338710, "tid": 2379450, + "ts": 6345942496744.198, "dur": 15.134, + "args": { + "External id": 993762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 11745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942496748.563, "dur": 9.701, + "args": { + "External id": 993763,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 11746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2379450, + "ts": 6345942496750.704, "dur": 7.443, + "args": { + "External id": 993764,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 11747 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_post_backward_callback", "pid": 2338710, "tid": 2379450, + "ts": 6345942496786.220, "dur": 16557.028, + "args": { + "External id": 993765,"Record function id": 0, "Ev Idx": 11748 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate", "pid": 2338710, "tid": 2379450, + "ts": 6345942496809.726, "dur": 46.555, + "args": { + "External id": 993766,"Record function id": 0, "Ev Idx": 11749 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard", "pid": 2338710, "tid": 2379450, + "ts": 6345942496863.576, "dur": 316.920, + "args": { + "External id": 993767,"Record function id": 0, "Ev Idx": 11750 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce", "pid": 2338710, "tid": 2379450, + "ts": 6345942497190.940, "dur": 15815.612, + "args": { + "External id": 993768,"Record function id": 0, "Ev Idx": 11751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942497347.698, "dur": 8.848, + "args": { + "External id": 993769,"Record function id": 0, "Concrete Inputs": ["[1134596096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2379450, + "ts": 6345942497369.463, "dur": 6.233, + "args": { + "External id": 993770,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1134596096], []], "Ev Idx": 11753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345942497400.519, "dur": 13525.548, + "args": { + "External id": 993771,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[], [], [], [141824512, 1]], "Input Dims": [[], [], [], [8, 141824512]], "Ev Idx": 11754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338710, "tid": 2379450, + "ts": 6345942497432.665, "dur": 13474.652, + "args": { + "External id": 993772,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[], [], [], [141824512, 1]], "Input Dims": [[], [], [], [8, 141824512]], "Ev Idx": 11755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942498408.958, "dur": 29.785, + "args": { + "External id": 993773,"Record function id": 0, "Concrete Inputs": ["[277237]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2379450, + "ts": 6345942498802.691, "dur": 12039.573, + "args": { + "External id": 993774,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[277237], [], [], [], [], [], [], []], "Ev Idx": 11757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2379450, + "ts": 6345942498808.709, "dur": 12032.068, + "args": { + "External id": 993775,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[277237], [], [], [], [], [], []], "Ev Idx": 11758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942498816.858, "dur": 17.637, + "args": { + "External id": 993776,"Record function id": 0, "Concrete Inputs": ["[277237]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2379450, + "ts": 6345942498838.014, "dur": 11993.701, + "args": { + "External id": 993777,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[277237], [277237], []], "Ev Idx": 11760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942511174.044, "dur": 18.345, + "args": { + "External id": 993778,"Record function id": 0, "Concrete Inputs": ["", "[141824512]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1134596096], [], [], [], [], []], "Ev Idx": 11761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2379450, + "ts": 6345942511180.837, "dur": 10.825, + "args": { + "External id": 993779,"Record function id": 0, "Concrete Inputs": ["[141824512]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338710, "tid": 2379450, + "ts": 6345942511233.662, "dur": 437.853, + "args": { + "External id": 993780,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[141824512], [1134596096], [], [], [], []], "Ev Idx": 11763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942511269.626, "dur": 394.766, + "args": { + "External id": 993781,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 141824512, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1134596096], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11764, "In msg nelems": 1134596096 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338710, "tid": 2379450, + "ts": 6345942511285.545, "dur": 371.911, + "args": { + "External id": 993782,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1134596096]], "Ev Idx": 11765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2379450, + "ts": 6345942511696.423, "dur": 3.676, + "args": { + "External id": 993783,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11766, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942511775.669, "dur": 8.713, + "args": { + "External id": 993784,"Record function id": 0, "Concrete Inputs": ["", "[4000, 4096]", "[4096, 1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942511802.371, "dur": 38.206, + "args": { + "External id": 993785,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4000, 4096], [4000, 4096], []], "Ev Idx": 11768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942511854.203, "dur": 1.725, + "args": { + "External id": 993786,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "16384000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942511863.607, "dur": 17.366, + "args": { + "External id": 993787,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942511887.379, "dur": 1.384, + "args": { + "External id": 993788,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "16384512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942511895.108, "dur": 17.326, + "args": { + "External id": 993789,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942511919.278, "dur": 1.231, + "args": { + "External id": 993790,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "18481664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942511925.604, "dur": 13.891, + "args": { + "External id": 993791,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942511944.511, "dur": 1.118, + "args": { + "External id": 993792,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "19005952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942511989.857, "dur": 15.254, + "args": { + "External id": 993793,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512035.176, "dur": 3.303, + "args": { + "External id": 993794,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "19530240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512046.066, "dur": 53.955, + "args": { + "External id": 993795,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512110.064, "dur": 4.965, + "args": { + "External id": 993796,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "21627392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512120.339, "dur": 14.907, + "args": { + "External id": 993797,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512140.250, "dur": 0.813, + "args": { + "External id": 993798,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "21627904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512145.179, "dur": 12.797, + "args": { + "External id": 993799,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512162.649, "dur": 0.848, + "args": { + "External id": 993800,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "28967936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512167.919, "dur": 12.205, + "args": { + "External id": 993801,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512186.505, "dur": 0.834, + "args": { + "External id": 993802,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "36307968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512193.379, "dur": 13.366, + "args": { + "External id": 993803,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512211.997, "dur": 0.836, + "args": { + "External id": 993804,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "43648000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512217.760, "dur": 12.645, + "args": { + "External id": 993805,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512235.027, "dur": 0.844, + "args": { + "External id": 993806,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "43648512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512239.848, "dur": 13.224, + "args": { + "External id": 993807,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512257.880, "dur": 0.868, + "args": { + "External id": 993808,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "45745664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512263.355, "dur": 14.818, + "args": { + "External id": 993809,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512283.086, "dur": 0.846, + "args": { + "External id": 993810,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "46269952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512288.365, "dur": 13.015, + "args": { + "External id": 993811,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512306.453, "dur": 3.182, + "args": { + "External id": 993812,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "46794240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512317.132, "dur": 16.209, + "args": { + "External id": 993813,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512338.736, "dur": 0.870, + "args": { + "External id": 993814,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "48891392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512344.004, "dur": 14.120, + "args": { + "External id": 993815,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512363.271, "dur": 0.834, + "args": { + "External id": 993816,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "48891904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512368.456, "dur": 14.736, + "args": { + "External id": 993817,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512388.471, "dur": 0.857, + "args": { + "External id": 993818,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "56231936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512393.486, "dur": 14.833, + "args": { + "External id": 993819,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512414.953, "dur": 1.232, + "args": { + "External id": 993820,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "63571968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512421.196, "dur": 14.088, + "args": { + "External id": 993821,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512440.419, "dur": 1.025, + "args": { + "External id": 993822,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "70912000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512445.005, "dur": 14.184, + "args": { + "External id": 993823,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512464.490, "dur": 0.898, + "args": { + "External id": 993824,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "70912512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512470.040, "dur": 13.948, + "args": { + "External id": 993825,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512488.914, "dur": 0.853, + "args": { + "External id": 993826,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "73009664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512493.826, "dur": 13.853, + "args": { + "External id": 993827,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512512.755, "dur": 3.151, + "args": { + "External id": 993828,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "73533952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512520.712, "dur": 14.947, + "args": { + "External id": 993829,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512540.555, "dur": 0.902, + "args": { + "External id": 993830,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "74058240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512545.445, "dur": 13.763, + "args": { + "External id": 993831,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512564.121, "dur": 0.805, + "args": { + "External id": 993832,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "76155392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512569.510, "dur": 14.597, + "args": { + "External id": 993833,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512589.233, "dur": 0.929, + "args": { + "External id": 993834,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "76155904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512594.757, "dur": 13.923, + "args": { + "External id": 993835,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512614.011, "dur": 0.840, + "args": { + "External id": 993836,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "83495936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512619.606, "dur": 14.527, + "args": { + "External id": 993837,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512639.553, "dur": 0.874, + "args": { + "External id": 993838,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "90835968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512646.464, "dur": 14.595, + "args": { + "External id": 993839,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512665.735, "dur": 0.884, + "args": { + "External id": 993840,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "98176000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512671.693, "dur": 14.057, + "args": { + "External id": 993841,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512690.623, "dur": 1.141, + "args": { + "External id": 993842,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "98176512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512696.847, "dur": 13.459, + "args": { + "External id": 993843,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512716.116, "dur": 2.890, + "args": { + "External id": 993844,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "100273664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512724.942, "dur": 15.018, + "args": { + "External id": 993845,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512746.538, "dur": 0.911, + "args": { + "External id": 993846,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "100797952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512753.340, "dur": 13.234, + "args": { + "External id": 993847,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512771.866, "dur": 0.911, + "args": { + "External id": 993848,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "101322240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512777.448, "dur": 13.895, + "args": { + "External id": 993849,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512796.152, "dur": 1.050, + "args": { + "External id": 993850,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "103419392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512801.192, "dur": 12.016, + "args": { + "External id": 993851,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512818.029, "dur": 0.877, + "args": { + "External id": 993852,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "103419904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512822.965, "dur": 13.750, + "args": { + "External id": 993853,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512841.783, "dur": 0.748, + "args": { + "External id": 993854,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "110759936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512846.014, "dur": 11.838, + "args": { + "External id": 993855,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512863.723, "dur": 0.793, + "args": { + "External id": 993856,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "118099968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512868.586, "dur": 12.888, + "args": { + "External id": 993857,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512886.780, "dur": 0.925, + "args": { + "External id": 993858,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "125440000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512892.412, "dur": 11.863, + "args": { + "External id": 993859,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2379450, + "ts": 6345942512909.868, "dur": 3.020, + "args": { + "External id": 993860,"Record function id": 0, "Concrete Inputs": ["", "[4000, 4096]", "[4096, 1]", "125440512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2379450, + "ts": 6345942512917.161, "dur": 13.783, + "args": { + "External id": 993861,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4000, 4096], [4000, 4096], []], "Ev Idx": 11844 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "ProfilerStep#22527", "pid": 2338710, "tid": 2338710, + "ts": 6345936068558.981, "dur": 6474177.955, + "args": { + "External id": 972801,"Record function id": 0, "Ev Idx": 11845 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.zero_grad#AdamW.zero_grad", "pid": 2338710, "tid": 2338710, + "ts": 6345936068597.434, "dur": 790.463, + "args": { + "External id": 972802,"Record function id": 0, "Ev Idx": 11846 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "enumerate(DataLoader)#_StatefulMultiProcessingDataLoaderIter.__next__", "pid": 2338710, "tid": 2338710, + "ts": 6345936069449.570, "dur": 112.127, + "args": { + "External id": 972803,"Record function id": 0, "Ev Idx": 11847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936070339.442, "dur": 23.460, + "args": { + "External id": 972804,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], []], "Ev Idx": 11848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936070354.066, "dur": 3.295, + "args": { + "External id": 972805,"Record function id": 0, "Concrete Inputs": ["", "[8, 8192]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 11849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936070365.967, "dur": 5.284, + "args": { + "External id": 972806,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], []], "Ev Idx": 11850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936070369.059, "dur": 0.923, + "args": { + "External id": 972807,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 11851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936070402.187, "dur": 4536.586, + "args": { + "External id": 972808,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], [], []], "Ev Idx": 11852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936070411.055, "dur": 4527.018, + "args": { + "External id": 972809,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], []], "Ev Idx": 11853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936070421.718, "dur": 12.142, + "args": { + "External id": 972810,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "[4096, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936070436.668, "dur": 4500.039, + "args": { + "External id": 972811,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 11855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936070448.960, "dur": 0.622, + "args": { + "External id": 972812,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 11856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936070453.666, "dur": 12.595, + "args": { + "External id": 972813,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[8192, 1], [4096, 1]], "Input Dims": [[8, 4096], [8, 4096]], "Ev Idx": 11857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338710, "tid": 2338710, + "ts": 6345936070459.193, "dur": 6.872, + "args": { + "External id": 972814,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], []], "Input Dims": [[8, 4096], [], []], "Ev Idx": 11858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936070462.473, "dur": 3.254, + "args": { + "External id": 972815,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 11859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338710, "tid": 2338710, + "ts": 6345936070468.835, "dur": 140.299, + "args": { + "External id": 972816,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 11860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2338710, + "ts": 6345936070472.346, "dur": 136.379, + "args": { + "External id": 972817,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 11861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936070475.978, "dur": 11.109, + "args": { + "External id": 972818,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 11862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936070481.480, "dur": 5.057, + "args": { + "External id": 972819,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936070487.998, "dur": 120.179, + "args": { + "External id": 972820,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 11864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936070612.154, "dur": 4320.608, + "args": { + "External id": 972821,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 11865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936074958.777, "dur": 423.365, + "args": { + "External id": 972822,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], [], []], "Ev Idx": 11866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936074962.135, "dur": 418.215, + "args": { + "External id": 972823,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], []], "Ev Idx": 11867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936074967.682, "dur": 10.826, + "args": { + "External id": 972824,"Record function id": 0, "Concrete Inputs": ["[8, 8192]", "[8192, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936074979.579, "dur": 397.309, + "args": { + "External id": 972825,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[8192, 1], [8192, 1], []], "Input Dims": [[8, 8192], [8, 8192], []], "Ev Idx": 11869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2338710, "tid": 2338710, + "ts": 6345936075414.648, "dur": 73.213, + "args": { + "External id": 972826,"Record function id": 0, "Concrete Inputs": ["0", "4096", "", "", "", "False"], "Input type": ["Scalar", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936075421.909, "dur": 6.553, + "args": { + "External id": 972827,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2338710, "tid": 2338710, + "ts": 6345936075432.895, "dur": 54.466, + "args": { + "External id": 972828,"Record function id": 0, "Concrete Inputs": ["0", "4096", "1", ""], "Input type": ["Scalar", "Scalar", "Scalar", "long int"], "Input Strides": [[], [], [], [1]], "Input Dims": [[], [], [], [0]], "Ev Idx": 11872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345936075445.029, "dur": 8.853, + "args": { + "External id": 972829,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["long int", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 11873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::repeat", "pid": 2338710, "tid": 2338710, + "ts": 6345936075502.790, "dur": 95.940, + "args": { + "External id": 972830,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338710, "tid": 2338710, + "ts": 6345936075510.319, "dur": 11.059, + "args": { + "External id": 972831,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[4096], [], []], "Ev Idx": 11875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936075517.579, "dur": 3.437, + "args": { + "External id": 972832,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "[4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 11876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936075524.884, "dur": 4.612, + "args": { + "External id": 972833,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2338710, + "ts": 6345936075532.371, "dur": 5.849, + "args": { + "External id": 972834,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[4096, 1]], "Input Dims": [[8, 4096]], "Ev Idx": 11878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338710, "tid": 2338710, + "ts": 6345936075542.641, "dur": 9.381, + "args": { + "External id": 972835,"Record function id": 0, "Concrete Inputs": ["", "0", "1", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 11879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936075550.755, "dur": 0.882, + "args": { + "External id": 972836,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1]", "[4096, 1, 4096]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 11880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338710, "tid": 2338710, + "ts": 6345936075554.224, "dur": 2.913, + "args": { + "External id": 972837,"Record function id": 0, "Concrete Inputs": ["", "1", "4096", "4096"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[8, 4096, 1], [], [], []], "Ev Idx": 11881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936075555.723, "dur": 1.310, + "args": { + "External id": 972838,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "[4096, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[8, 4096, 1], [], [], []], "Ev Idx": 11882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936075559.978, "dur": 5.690, + "args": { + "External id": 972839,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[4096, 1], [4096, 4096, 4096, 1]], "Input Dims": [[1, 4096], [8, 1, 1, 4096]], "Ev Idx": 11883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338710, "tid": 2338710, + "ts": 6345936075561.392, "dur": 4.159, + "args": { + "External id": 972840,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1, 4096], [], []], "Ev Idx": 11884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936075563.104, "dur": 2.339, + "args": { + "External id": 972841,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "[0, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 11885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936075566.906, "dur": 29.891, + "args": { + "External id": 972842,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 4096, 4096, 1], [0, 4096, 4096, 1], []], "Input Dims": [[8, 1, 1, 4096], [8, 1, 1, 4096], []], "Ev Idx": 11886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936075608.699, "dur": 42.639, + "args": { + "External id": 972843,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 11887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936075613.674, "dur": 37.401, + "args": { + "External id": 972844,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "", "", "", "False", ""], "Input type": ["long int", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], []], "Ev Idx": 11888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936075620.659, "dur": 5.352, + "args": { + "External id": 972845,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "[4096, 1]", "3", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936075626.980, "dur": 22.267, + "args": { + "External id": 972846,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 11890 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_pre_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936075792.600, "dur": 173.171, + "args": { + "External id": 972847,"Record function id": 0, "Ev Idx": 11891 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::inputs_to_device", "pid": 2338710, "tid": 2338710, + "ts": 6345936075892.026, "dur": 60.594, + "args": { + "External id": 972848,"Record function id": 0, "Ev Idx": 11892 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345936075973.640, "dur": 121.988, + "args": { + "External id": 972849,"Record function id": 0, "Ev Idx": 11893 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936076112.065, "dur": 13332.468, + "args": { + "External id": 972850,"Record function id": 0, "Ev Idx": 11894 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather", "pid": 2338710, "tid": 2338710, + "ts": 6345936076122.060, "dur": 1588.556, + "args": { + "External id": 972851,"Record function id": 0, "Ev Idx": 11895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936076244.731, "dur": 12.435, + "args": { + "External id": 972852,"Record function id": 0, "Concrete Inputs": ["[141824512]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936076278.390, "dur": 169.275, + "args": { + "External id": 972853,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["c10::BFloat16", "", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[141824512], [], []], "Ev Idx": 11897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076285.371, "dur": 2.190, + "args": { + "External id": 972854,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076295.852, "dur": 0.396, + "args": { + "External id": 972855,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "16384000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076298.100, "dur": 1.081, + "args": { + "External id": 972856,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "16384512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076300.173, "dur": 3.658, + "args": { + "External id": 972857,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "18481664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076310.835, "dur": 0.587, + "args": { + "External id": 972858,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19005952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076312.166, "dur": 0.552, + "args": { + "External id": 972859,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "19530240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076316.851, "dur": 2.894, + "args": { + "External id": 972860,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "21627392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076320.785, "dur": 0.727, + "args": { + "External id": 972861,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "21627904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076322.229, "dur": 0.577, + "args": { + "External id": 972862,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "28967936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076328.384, "dur": 0.381, + "args": { + "External id": 972863,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "36307968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076329.609, "dur": 0.343, + "args": { + "External id": 972864,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "43648000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076330.631, "dur": 3.207, + "args": { + "External id": 972865,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "43648512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076339.750, "dur": 0.397, + "args": { + "External id": 972866,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45745664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076340.911, "dur": 0.370, + "args": { + "External id": 972867,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46269952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076345.175, "dur": 2.445, + "args": { + "External id": 972868,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "46794240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076348.461, "dur": 0.374, + "args": { + "External id": 972869,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "48891392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076349.690, "dur": 0.346, + "args": { + "External id": 972870,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "48891904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076356.060, "dur": 0.273, + "args": { + "External id": 972871,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "56231936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076357.364, "dur": 0.326, + "args": { + "External id": 972872,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "63571968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076358.385, "dur": 2.801, + "args": { + "External id": 972873,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "70912000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076367.033, "dur": 0.348, + "args": { + "External id": 972874,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "70912512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076368.160, "dur": 0.386, + "args": { + "External id": 972875,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "73009664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076372.689, "dur": 2.069, + "args": { + "External id": 972876,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "73533952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076375.536, "dur": 0.582, + "args": { + "External id": 972877,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "74058240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076377.102, "dur": 0.385, + "args": { + "External id": 972878,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "76155392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076382.799, "dur": 0.282, + "args": { + "External id": 972879,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "76155904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076383.974, "dur": 0.343, + "args": { + "External id": 972880,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "83495936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076385.185, "dur": 2.400, + "args": { + "External id": 972881,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "90835968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076393.545, "dur": 0.584, + "args": { + "External id": 972882,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "98176000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076394.851, "dur": 0.678, + "args": { + "External id": 972883,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "98176512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076400.040, "dur": 2.656, + "args": { + "External id": 972884,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "100273664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076403.552, "dur": 0.254, + "args": { + "External id": 972885,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "100797952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076404.557, "dur": 0.615, + "args": { + "External id": 972886,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "101322240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076410.787, "dur": 0.279, + "args": { + "External id": 972887,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "103419392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076412.195, "dur": 0.378, + "args": { + "External id": 972888,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "103419904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076413.262, "dur": 2.690, + "args": { + "External id": 972889,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "110759936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076421.300, "dur": 0.402, + "args": { + "External id": 972890,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "118099968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076422.398, "dur": 0.323, + "args": { + "External id": 972891,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "125440000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076426.661, "dur": 2.159, + "args": { + "External id": 972892,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "125440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936076477.554, "dur": 163.780, + "args": { + "External id": 972893,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 11937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345936076729.854, "dur": 427.193, + "args": { + "External id": 972894,"Record function id": 0, "Concrete Inputs": ["", "", "141824512", "8", "4", "15", ""], "Input type": ["TensorList", "", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 11938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936076750.267, "dur": 5.716, + "args": { + "External id": 972895,"Record function id": 0, "Concrete Inputs": ["[1134596096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345936076765.849, "dur": 15.767, + "args": { + "External id": 972896,"Record function id": 0, "Concrete Inputs": ["", "0", "567298048", "141824512"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1134596096], [], [], []], "Ev Idx": 11940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936076771.327, "dur": 9.787, + "args": { + "External id": 972897,"Record function id": 0, "Concrete Inputs": ["", "0", "567298048", "709122560", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[1134596096], [], [], [], []], "Ev Idx": 11941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076776.919, "dur": 1.064, + "args": { + "External id": 972898,"Record function id": 0, "Concrete Inputs": ["", "[141824512]", "[1]", "567298048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1134596096], [], [], []], "Ev Idx": 11942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936076791.770, "dur": 135.344, + "args": { + "External id": 972899,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["c10::BFloat16", "", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[141824512], [], []], "Ev Idx": 11943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076793.846, "dur": 0.654, + "args": { + "External id": 972900,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "567298048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076798.237, "dur": 0.528, + "args": { + "External id": 972901,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "583682048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076799.705, "dur": 2.700, + "args": { + "External id": 972902,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "583682560"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076803.162, "dur": 1.020, + "args": { + "External id": 972903,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "585779712"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076807.224, "dur": 0.398, + "args": { + "External id": 972904,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "586304000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076808.376, "dur": 0.619, + "args": { + "External id": 972905,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "586828288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076810.150, "dur": 0.700, + "args": { + "External id": 972906,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "588925440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076813.920, "dur": 0.953, + "args": { + "External id": 972907,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "588925952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076815.705, "dur": 0.968, + "args": { + "External id": 972908,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "596265984"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076820.279, "dur": 0.452, + "args": { + "External id": 972909,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "603606016"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076821.672, "dur": 3.244, + "args": { + "External id": 972910,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "610946048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076825.889, "dur": 0.373, + "args": { + "External id": 972911,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "610946560"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076829.178, "dur": 2.536, + "args": { + "External id": 972912,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "613043712"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076832.519, "dur": 0.563, + "args": { + "External id": 972913,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "613568000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076833.949, "dur": 0.843, + "args": { + "External id": 972914,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "614092288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076840.126, "dur": 0.407, + "args": { + "External id": 972915,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "616189440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076841.337, "dur": 0.519, + "args": { + "External id": 972916,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "616189952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076845.071, "dur": 0.319, + "args": { + "External id": 972917,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "623529984"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076848.226, "dur": 2.980, + "args": { + "External id": 972918,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "630870016"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076851.865, "dur": 0.298, + "args": { + "External id": 972919,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "638210048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076855.933, "dur": 2.068, + "args": { + "External id": 972920,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "638210560"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076858.846, "dur": 0.807, + "args": { + "External id": 972921,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "640307712"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076860.545, "dur": 0.542, + "args": { + "External id": 972922,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "640832000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076865.400, "dur": 0.370, + "args": { + "External id": 972923,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "641356288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076866.557, "dur": 0.536, + "args": { + "External id": 972924,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "643453440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076870.820, "dur": 0.360, + "args": { + "External id": 972925,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "643453952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076873.845, "dur": 2.715, + "args": { + "External id": 972926,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "650793984"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076877.189, "dur": 0.416, + "args": { + "External id": 972927,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "658134016"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076881.047, "dur": 1.931, + "args": { + "External id": 972928,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "665474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076883.882, "dur": 0.370, + "args": { + "External id": 972929,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "665474560"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076885.250, "dur": 0.276, + "args": { + "External id": 972930,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "667571712"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076890.687, "dur": 0.266, + "args": { + "External id": 972931,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "668096000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076891.740, "dur": 0.576, + "args": { + "External id": 972932,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "668620288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076895.673, "dur": 0.350, + "args": { + "External id": 972933,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "670717440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076898.405, "dur": 2.628, + "args": { + "External id": 972934,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "670717952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076901.708, "dur": 0.519, + "args": { + "External id": 972935,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "678057984"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076905.836, "dur": 2.428, + "args": { + "External id": 972936,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "685398016"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076909.070, "dur": 0.555, + "args": { + "External id": 972937,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "692738048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936076910.540, "dur": 0.708, + "args": { + "External id": 972938,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "692738560"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936076952.516, "dur": 181.336, + "args": { + "External id": 972939,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 11983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345936077234.027, "dur": 352.390, + "args": { + "External id": 972940,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[1134596096], [141824512], [], [], []], "Ev Idx": 11984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936077277.622, "dur": 302.883, + "args": { + "External id": 972941,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1134596096, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[141824512], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11985, "In msg nelems": 141824512 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345936077290.220, "dur": 283.407, + "args": { + "External id": 972942,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[141824512]], "Ev Idx": 11986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936077614.514, "dur": 2.704, + "args": { + "External id": 972943,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11987, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out", "pid": 2338710, "tid": 2338710, + "ts": 6345936077729.641, "dur": 11321.529, + "args": { + "External id": 972944,"Record function id": 0, "Ev Idx": 11988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936077935.739, "dur": 8.013, + "args": { + "External id": 972945,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1134596096], []], "Ev Idx": 11989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936077947.850, "dur": 1.591, + "args": { + "External id": 972946,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[131072000], []], "Ev Idx": 11990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936077951.568, "dur": 1.316, + "args": { + "External id": 972947,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936077957.410, "dur": 3.428, + "args": { + "External id": 972948,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936077962.576, "dur": 1.080, + "args": { + "External id": 972949,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936077965.410, "dur": 1.156, + "args": { + "External id": 972950,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936077968.462, "dur": 1.074, + "args": { + "External id": 972951,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936077976.648, "dur": 2.349, + "args": { + "External id": 972952,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936077980.449, "dur": 0.695, + "args": { + "External id": 972953,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936077983.025, "dur": 0.905, + "args": { + "External id": 972954,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936077985.526, "dur": 1.099, + "args": { + "External id": 972955,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936077990.518, "dur": 3.478, + "args": { + "External id": 972956,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936077995.496, "dur": 0.995, + "args": { + "External id": 972957,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936077998.085, "dur": 1.142, + "args": { + "External id": 972958,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936078000.652, "dur": 1.116, + "args": { + "External id": 972959,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936078028.189, "dur": 4.772, + "args": { + "External id": 972960,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936078036.877, "dur": 0.888, + "args": { + "External id": 972961,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936078039.634, "dur": 1.087, + "args": { + "External id": 972962,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936078042.688, "dur": 1.237, + "args": { + "External id": 972963,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936078048.490, "dur": 3.110, + "args": { + "External id": 972964,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936078091.573, "dur": 2.829, + "args": { + "External id": 972965,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936078098.337, "dur": 1.056, + "args": { + "External id": 972966,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936078100.905, "dur": 0.816, + "args": { + "External id": 972967,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936078108.386, "dur": 2.346, + "args": { + "External id": 972968,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936078112.290, "dur": 0.867, + "args": { + "External id": 972969,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936078115.070, "dur": 0.671, + "args": { + "External id": 972970,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936078117.337, "dur": 1.136, + "args": { + "External id": 972971,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936078122.375, "dur": 3.490, + "args": { + "External id": 972972,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936078127.505, "dur": 0.717, + "args": { + "External id": 972973,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936078129.964, "dur": 0.974, + "args": { + "External id": 972974,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936078132.436, "dur": 0.921, + "args": { + "External id": 972975,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936078139.331, "dur": 2.134, + "args": { + "External id": 972976,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936078142.829, "dur": 0.988, + "args": { + "External id": 972977,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936078145.386, "dur": 0.973, + "args": { + "External id": 972978,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936078148.572, "dur": 0.803, + "args": { + "External id": 972979,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936078153.057, "dur": 3.324, + "args": { + "External id": 972980,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936078157.761, "dur": 0.863, + "args": { + "External id": 972981,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936078178.839, "dur": 1.050, + "args": { + "External id": 972982,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936078181.367, "dur": 0.663, + "args": { + "External id": 972983,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936078188.378, "dur": 2.697, + "args": { + "External id": 972984,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[131072000], []], "Ev Idx": 12028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936078225.737, "dur": 10679.682, + "args": { + "External id": 972985,"Record function id": 0, "Concrete Inputs": ["", "", "1", ""], "Input type": ["c10::BFloat16", "", "Scalar", "TensorList"], "Input Strides": [[141824512, 1], [], [], []], "Input Dims": [[8, 141824512], [], [], []], "Ev Idx": 12029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936078255.449, "dur": 10627.630, + "args": { + "External id": 972986,"Record function id": 0, "Concrete Inputs": ["", "", "1", ""], "Input type": ["c10::BFloat16", "", "Scalar", "TensorList"], "Input Strides": [[141824512, 1], [], [], []], "Input Dims": [[8, 141824512], [], [], []], "Ev Idx": 12030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936078282.815, "dur": 7.656, + "args": { + "External id": 972987,"Record function id": 0, "Concrete Inputs": ["[4384]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936078296.908, "dur": 10463.111, + "args": { + "External id": 972988,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[4384], [], [], [], [], [], [], []], "Ev Idx": 12032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936078300.159, "dur": 10458.295, + "args": { + "External id": 972989,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[4384], [], [], [], [], [], []], "Ev Idx": 12033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936078308.523, "dur": 6.558, + "args": { + "External id": 972990,"Record function id": 0, "Concrete Inputs": ["[4384]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936078317.154, "dur": 10432.520, + "args": { + "External id": 972991,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4384], [4384], []], "Ev Idx": 12035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936089577.335, "dur": 48.060, + "args": { + "External id": 972992,"Record function id": 0, "Ev Idx": 12036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 0/0", "pid": 2338710, "tid": 2338710, + "ts": 6345936089629.003, "dur": 325.212, + "args": { + "External id": 972993,"Record function id": 0, "Ev Idx": 12037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936089688.326, "dur": 252.230, + "args": { + "External id": 972994,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "long int"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32000, 4096], [8, 4096]], "Ev Idx": 12038 + } + }, + { + "ph": "s", "id": 224, "pid": 2338710, "tid": 2338710, "ts": 6345936089688.326, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936089800.671, "dur": 87.310, + "args": { + "External id": 972995,"kernel_hash": "cvykvfdpfag3zvgkkgm7dmy4omdmsmvjcopgv6veoygd3dudukzf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/vy/cvykvfdpfag3zvgkkgm7dmy4omdmsmvjcopgv6veoygd3dudukzf.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096], [32000, 4096], [8, 4096, 4096], []], "Ev Idx": 12039 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345936090164.967, "dur": 100.104, + "args": { + "External id": 972996,"Record function id": 0, "Ev Idx": 12040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.0)", "pid": 2338710, "tid": 2338710, + "ts": 6345936090282.958, "dur": 8905.365, + "args": { + "External id": 972997,"Record function id": 0, "Ev Idx": 12041 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2338710, "tid": 2338710, + "ts": 6345936090295.590, "dur": 1489.085, + "args": { + "External id": 972998,"Record function id": 0, "Ev Idx": 12042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936090407.007, "dur": 21.079, + "args": { + "External id": 972999,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936090449.167, "dur": 61.683, + "args": { + "External id": 973000,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936090461.692, "dur": 4.272, + "args": { + "External id": 973001,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936090469.334, "dur": 0.811, + "args": { + "External id": 973002,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936090474.292, "dur": 2.544, + "args": { + "External id": 973003,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936090478.575, "dur": 0.649, + "args": { + "External id": 973004,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936090480.675, "dur": 0.796, + "args": { + "External id": 973005,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936090488.125, "dur": 0.381, + "args": { + "External id": 973006,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936090490.006, "dur": 0.595, + "args": { + "External id": 973007,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936090492.195, "dur": 3.424, + "args": { + "External id": 973008,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936090500.958, "dur": 0.662, + "args": { + "External id": 973009,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936090525.326, "dur": 90.096, + "args": { + "External id": 973010,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345936090673.012, "dur": 186.981, + "args": { + "External id": 973011,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936090694.347, "dur": 7.715, + "args": { + "External id": 973012,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345936090710.251, "dur": 14.354, + "args": { + "External id": 973013,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936090716.219, "dur": 7.741, + "args": { + "External id": 973014,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936090720.816, "dur": 0.959, + "args": { + "External id": 973015,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936090735.787, "dur": 44.395, + "args": { + "External id": 973016,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936090740.859, "dur": 0.780, + "args": { + "External id": 973017,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936090743.581, "dur": 0.820, + "args": { + "External id": 973018,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936090747.789, "dur": 0.440, + "args": { + "External id": 973019,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936090752.119, "dur": 0.799, + "args": { + "External id": 973020,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936090754.411, "dur": 4.899, + "args": { + "External id": 973021,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936090760.995, "dur": 0.453, + "args": { + "External id": 973022,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936090765.283, "dur": 0.727, + "args": { + "External id": 973023,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936090769.076, "dur": 0.453, + "args": { + "External id": 973024,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936090773.498, "dur": 0.463, + "args": { + "External id": 973025,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936090796.643, "dur": 49.024, + "args": { + "External id": 973026,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345936090944.294, "dur": 665.996, + "args": { + "External id": 973027,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936090986.641, "dur": 613.651, + "args": { + "External id": 973028,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12072, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345936091003.729, "dur": 586.649, + "args": { + "External id": 973029,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936091654.767, "dur": 4.009, + "args": { + "External id": 973030,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12074, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2338710, "tid": 2338710, + "ts": 6345936091820.084, "dur": 6979.035, + "args": { + "External id": 973031,"Record function id": 0, "Ev Idx": 12075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936091977.806, "dur": 9.803, + "args": { + "External id": 973032,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936091993.286, "dur": 1.590, + "args": { + "External id": 973033,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936091997.911, "dur": 1.600, + "args": { + "External id": 973034,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936092002.750, "dur": 4.413, + "args": { + "External id": 973035,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936092046.527, "dur": 2.284, + "args": { + "External id": 973036,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936092051.473, "dur": 67.029, + "args": { + "External id": 973037,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936092130.947, "dur": 2.041, + "args": { + "External id": 973038,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936092136.462, "dur": 3.092, + "args": { + "External id": 973039,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936092142.308, "dur": 1.555, + "args": { + "External id": 973040,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936092146.373, "dur": 1.289, + "args": { + "External id": 973041,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936092181.560, "dur": 6526.950, + "args": { + "External id": 973042,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936092210.232, "dur": 6481.185, + "args": { + "External id": 973043,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936092246.878, "dur": 24.482, + "args": { + "External id": 973044,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936092277.814, "dur": 6350.249, + "args": { + "External id": 973045,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936092281.638, "dur": 6344.941, + "args": { + "External id": 973046,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936092289.675, "dur": 14.210, + "args": { + "External id": 973047,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936092307.066, "dur": 6311.043, + "args": { + "External id": 973048,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936099053.856, "dur": 94.281, + "args": { + "External id": 973049,"Sequence number": 10552243, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12093 + } + }, + { + "ph": "s", "id": 223, "pid": 2338710, "tid": 2338710, "ts": 6345936099053.856, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936099119.583, "dur": 21.361, + "args": { + "External id": 973050,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936099129.937, "dur": 10.534, + "args": { + "External id": 973051,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936099247.836, "dur": 118.876, + "args": { + "External id": 973052,"Record function id": 0, "Ev Idx": 12096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345936099368.301, "dur": 1404.773, + "args": { + "External id": 973053,"Record function id": 0, "Ev Idx": 12097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936099421.634, "dur": 1333.144, + "args": { + "External id": 973054,"Sequence number": 10552244, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12098 + } + }, + { + "ph": "s", "id": 222, "pid": 2338710, "tid": 2338710, "ts": 6345936099421.634, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936099514.004, "dur": 61.621, + "args": { + "External id": 973055,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936099594.569, "dur": 122.141, + "args": { + "External id": 973056,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936099732.185, "dur": 47.053, + "args": { + "External id": 973057,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936099790.339, "dur": 34.405, + "args": { + "External id": 973058,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936099861.144, "dur": 31.790, + "args": { + "External id": 973059,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345936099918.381, "dur": 23.427, + "args": { + "External id": 973060,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936099968.770, "dur": 241.132, + "args": { + "External id": 973061,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936100054.849, "dur": 63.537, + "args": { + "External id": 973062,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936100103.839, "dur": 12.112, + "args": { + "External id": 973063,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936100124.391, "dur": 5.334, + "args": { + "External id": 973064,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936100130.971, "dur": 1.501, + "args": { + "External id": 973065,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936100135.466, "dur": 3.938, + "args": { + "External id": 973066,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936100227.130, "dur": 73.458, + "args": { + "External id": 973067,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345936100351.385, "dur": 39.728, + "args": { + "External id": 973068,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936100400.359, "dur": 50.366, + "args": { + "External id": 973069,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936100460.166, "dur": 41.928, + "args": { + "External id": 973070,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936100531.221, "dur": 29.178, + "args": { + "External id": 973071,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936100569.251, "dur": 43.964, + "args": { + "External id": 973072,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936100640.247, "dur": 24.263, + "args": { + "External id": 973073,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12117 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.0)", "pid": 2338710, "tid": 2338710, + "ts": 6345936100851.646, "dur": 100.210, + "args": { + "External id": 973074,"Record function id": 0, "Ev Idx": 12118 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345936101139.684, "dur": 64.983, + "args": { + "External id": 973075,"Record function id": 0, "Ev Idx": 12119 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.1)", "pid": 2338710, "tid": 2338710, + "ts": 6345936101216.371, "dur": 27121.031, + "args": { + "External id": 973076,"Record function id": 0, "Ev Idx": 12120 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2338710, "tid": 2338710, + "ts": 6345936101228.227, "dur": 1123.356, + "args": { + "External id": 973077,"Record function id": 0, "Ev Idx": 12121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936101327.249, "dur": 13.059, + "args": { + "External id": 973078,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936101360.418, "dur": 46.247, + "args": { + "External id": 973079,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936101366.273, "dur": 4.861, + "args": { + "External id": 973080,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936101373.192, "dur": 0.973, + "args": { + "External id": 973081,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936101377.770, "dur": 0.494, + "args": { + "External id": 973082,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936101381.477, "dur": 0.450, + "args": { + "External id": 973083,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936101382.988, "dur": 3.242, + "args": { + "External id": 973084,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936101388.948, "dur": 0.532, + "args": { + "External id": 973085,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936101391.488, "dur": 0.458, + "args": { + "External id": 973086,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936101395.371, "dur": 0.351, + "args": { + "External id": 973087,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936101396.335, "dur": 2.960, + "args": { + "External id": 973088,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936101419.330, "dur": 72.085, + "args": { + "External id": 973089,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345936101534.546, "dur": 150.721, + "args": { + "External id": 973090,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936101550.320, "dur": 4.642, + "args": { + "External id": 973091,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345936101561.860, "dur": 12.124, + "args": { + "External id": 973092,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936101567.338, "dur": 6.137, + "args": { + "External id": 973093,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936101571.332, "dur": 0.561, + "args": { + "External id": 973094,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936101581.620, "dur": 39.576, + "args": { + "External id": 973095,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936101586.716, "dur": 0.820, + "args": { + "External id": 973096,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936101589.051, "dur": 3.148, + "args": { + "External id": 973097,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936101593.114, "dur": 3.137, + "args": { + "External id": 973098,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936101599.061, "dur": 0.538, + "args": { + "External id": 973099,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936101600.885, "dur": 0.574, + "args": { + "External id": 973100,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936101606.595, "dur": 0.405, + "args": { + "External id": 973101,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936101607.711, "dur": 0.622, + "args": { + "External id": 973102,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936101609.262, "dur": 0.768, + "args": { + "External id": 973103,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936101615.624, "dur": 0.588, + "args": { + "External id": 973104,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936101635.635, "dur": 37.518, + "args": { + "External id": 973105,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345936101748.733, "dur": 475.817, + "args": { + "External id": 973106,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936101787.491, "dur": 429.749, + "args": { + "External id": 973107,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12151, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345936101800.200, "dur": 409.995, + "args": { + "External id": 973108,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936102258.965, "dur": 3.116, + "args": { + "External id": 973109,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12153, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2338710, "tid": 2338710, + "ts": 6345936102375.439, "dur": 25556.382, + "args": { + "External id": 973110,"Record function id": 0, "Ev Idx": 12154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936102493.846, "dur": 7.697, + "args": { + "External id": 973111,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936102506.040, "dur": 1.371, + "args": { + "External id": 973112,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936102509.238, "dur": 1.420, + "args": { + "External id": 973113,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936102512.855, "dur": 1.810, + "args": { + "External id": 973114,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936102516.141, "dur": 1.164, + "args": { + "External id": 973115,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936102521.483, "dur": 1.462, + "args": { + "External id": 973116,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936102524.561, "dur": 1.130, + "args": { + "External id": 973117,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936102527.671, "dur": 4.761, + "args": { + "External id": 973118,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936102533.911, "dur": 0.886, + "args": { + "External id": 973119,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936102538.507, "dur": 0.829, + "args": { + "External id": 973120,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936102562.228, "dur": 25289.435, + "args": { + "External id": 973121,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936102580.733, "dur": 25256.209, + "args": { + "External id": 973122,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936102619.152, "dur": 18.123, + "args": { + "External id": 973123,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936102641.643, "dur": 25129.760, + "args": { + "External id": 973124,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936102645.108, "dur": 25125.139, + "args": { + "External id": 973125,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936102651.680, "dur": 6.067, + "args": { + "External id": 973126,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936102659.701, "dur": 25105.016, + "args": { + "External id": 973127,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936128251.377, "dur": 50.525, + "args": { + "External id": 973128,"Sequence number": 10552245, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12172 + } + }, + { + "ph": "s", "id": 221, "pid": 2338710, "tid": 2338710, "ts": 6345936128251.377, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936128279.930, "dur": 15.253, + "args": { + "External id": 973129,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936128286.317, "dur": 8.312, + "args": { + "External id": 973130,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936128389.162, "dur": 91.478, + "args": { + "External id": 973131,"Record function id": 0, "Ev Idx": 12175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345936128482.081, "dur": 1796.573, + "args": { + "External id": 973132,"Record function id": 0, "Ev Idx": 12176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936128526.166, "dur": 1735.339, + "args": { + "External id": 973133,"Sequence number": 10552246, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12177 + } + }, + { + "ph": "s", "id": 220, "pid": 2338710, "tid": 2338710, "ts": 6345936128526.166, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936128633.554, "dur": 73.035, + "args": { + "External id": 973134,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936128735.749, "dur": 156.478, + "args": { + "External id": 973135,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936128912.855, "dur": 62.971, + "args": { + "External id": 973136,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936128986.481, "dur": 148.236, + "args": { + "External id": 973137,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936129202.833, "dur": 43.971, + "args": { + "External id": 973138,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345936129290.999, "dur": 40.113, + "args": { + "External id": 973139,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936129368.543, "dur": 221.242, + "args": { + "External id": 973140,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936129456.050, "dur": 22.427, + "args": { + "External id": 973141,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936129467.517, "dur": 9.480, + "args": { + "External id": 973142,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936129482.063, "dur": 9.233, + "args": { + "External id": 973143,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936129493.517, "dur": 4.580, + "args": { + "External id": 973144,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936129501.896, "dur": 5.505, + "args": { + "External id": 973145,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936129607.299, "dur": 104.181, + "args": { + "External id": 973146,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345936129762.368, "dur": 46.117, + "args": { + "External id": 973147,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936129823.790, "dur": 58.901, + "args": { + "External id": 973148,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936129892.351, "dur": 41.881, + "args": { + "External id": 973149,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936129964.408, "dur": 31.069, + "args": { + "External id": 973150,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936130002.335, "dur": 114.085, + "args": { + "External id": 973151,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936130148.987, "dur": 26.760, + "args": { + "External id": 973152,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12196 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.1)", "pid": 2338710, "tid": 2338710, + "ts": 6345936130360.797, "dur": 97.695, + "args": { + "External id": 973153,"Record function id": 0, "Ev Idx": 12197 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345936130548.238, "dur": 58.223, + "args": { + "External id": 973154,"Record function id": 0, "Ev Idx": 12198 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.2)", "pid": 2338710, "tid": 2338710, + "ts": 6345936130616.690, "dur": 27217.569, + "args": { + "External id": 973155,"Record function id": 0, "Ev Idx": 12199 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2338710, "tid": 2338710, + "ts": 6345936130627.775, "dur": 1112.294, + "args": { + "External id": 973156,"Record function id": 0, "Ev Idx": 12200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936130724.161, "dur": 10.846, + "args": { + "External id": 973157,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936130749.997, "dur": 45.383, + "args": { + "External id": 973158,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936130756.040, "dur": 2.622, + "args": { + "External id": 973159,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936130765.733, "dur": 0.490, + "args": { + "External id": 973160,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936130767.203, "dur": 0.555, + "args": { + "External id": 973161,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936130768.698, "dur": 0.686, + "args": { + "External id": 973162,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936130775.153, "dur": 0.579, + "args": { + "External id": 973163,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936130776.836, "dur": 0.638, + "args": { + "External id": 973164,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936130780.353, "dur": 4.598, + "args": { + "External id": 973165,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936130786.037, "dur": 0.401, + "args": { + "External id": 973166,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936130787.055, "dur": 0.384, + "args": { + "External id": 973167,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936130809.683, "dur": 63.935, + "args": { + "External id": 973168,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345936130911.199, "dur": 219.398, + "args": { + "External id": 973169,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936130926.702, "dur": 4.800, + "args": { + "External id": 973170,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345936130937.790, "dur": 11.473, + "args": { + "External id": 973171,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936130943.163, "dur": 5.579, + "args": { + "External id": 973172,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936130946.892, "dur": 0.473, + "args": { + "External id": 973173,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936130958.871, "dur": 37.834, + "args": { + "External id": 973174,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936130960.660, "dur": 2.621, + "args": { + "External id": 973175,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936130964.465, "dur": 0.606, + "args": { + "External id": 973176,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936130968.927, "dur": 0.808, + "args": { + "External id": 973177,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936130973.590, "dur": 3.540, + "args": { + "External id": 973178,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936130979.865, "dur": 0.327, + "args": { + "External id": 973179,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936130981.032, "dur": 0.651, + "args": { + "External id": 973180,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936130984.339, "dur": 0.434, + "args": { + "External id": 973181,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936130987.682, "dur": 0.362, + "args": { + "External id": 973182,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936130988.703, "dur": 3.067, + "args": { + "External id": 973183,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936131030.885, "dur": 87.806, + "args": { + "External id": 973184,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345936131203.681, "dur": 424.384, + "args": { + "External id": 973185,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936131243.717, "dur": 378.643, + "args": { + "External id": 973186,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12230, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345936131260.579, "dur": 355.503, + "args": { + "External id": 973187,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936131656.850, "dur": 2.772, + "args": { + "External id": 973188,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12232, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2338710, "tid": 2338710, + "ts": 6345936131762.989, "dur": 25829.118, + "args": { + "External id": 973189,"Record function id": 0, "Ev Idx": 12233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936131876.788, "dur": 7.339, + "args": { + "External id": 973190,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936131887.690, "dur": 1.252, + "args": { + "External id": 973191,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936131891.049, "dur": 3.719, + "args": { + "External id": 973192,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936131896.748, "dur": 0.865, + "args": { + "External id": 973193,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936131899.134, "dur": 1.228, + "args": { + "External id": 973194,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936131901.692, "dur": 1.251, + "args": { + "External id": 973195,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936131907.006, "dur": 1.091, + "args": { + "External id": 973196,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936131909.647, "dur": 2.703, + "args": { + "External id": 973197,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936131913.980, "dur": 0.783, + "args": { + "External id": 973198,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936131916.121, "dur": 0.532, + "args": { + "External id": 973199,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936131940.775, "dur": 25594.215, + "args": { + "External id": 973200,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936131958.348, "dur": 25565.888, + "args": { + "External id": 973201,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936131981.218, "dur": 19.601, + "args": { + "External id": 973202,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936132005.447, "dur": 25474.894, + "args": { + "External id": 973203,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936132031.051, "dur": 25448.117, + "args": { + "External id": 973204,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936132040.598, "dur": 7.174, + "args": { + "External id": 973205,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936132050.197, "dur": 25424.655, + "args": { + "External id": 973206,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936157763.880, "dur": 39.857, + "args": { + "External id": 973207,"Sequence number": 10552247, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12251 + } + }, + { + "ph": "s", "id": 219, "pid": 2338710, "tid": 2338710, "ts": 6345936157763.880, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936157785.133, "dur": 12.121, + "args": { + "External id": 973208,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936157790.800, "dur": 6.223, + "args": { + "External id": 973209,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936157886.994, "dur": 91.910, + "args": { + "External id": 973210,"Record function id": 0, "Ev Idx": 12254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345936157981.062, "dur": 1410.556, + "args": { + "External id": 973211,"Record function id": 0, "Ev Idx": 12255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936158088.460, "dur": 1285.301, + "args": { + "External id": 973212,"Sequence number": 10552248, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12256 + } + }, + { + "ph": "s", "id": 218, "pid": 2338710, "tid": 2338710, "ts": 6345936158088.460, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936158183.330, "dur": 62.123, + "args": { + "External id": 973213,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936158262.273, "dur": 123.736, + "args": { + "External id": 973214,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936158401.496, "dur": 43.906, + "args": { + "External id": 973215,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936158456.117, "dur": 34.150, + "args": { + "External id": 973216,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936158519.024, "dur": 29.127, + "args": { + "External id": 973217,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345936158570.244, "dur": 20.855, + "args": { + "External id": 973218,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936158619.397, "dur": 159.656, + "args": { + "External id": 973219,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936158680.174, "dur": 16.387, + "args": { + "External id": 973220,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936158688.909, "dur": 6.777, + "args": { + "External id": 973221,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936158700.464, "dur": 5.758, + "args": { + "External id": 973222,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936158707.523, "dur": 1.294, + "args": { + "External id": 973223,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936158711.508, "dur": 5.909, + "args": { + "External id": 973224,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936158793.086, "dur": 59.512, + "args": { + "External id": 973225,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345936158889.955, "dur": 34.691, + "args": { + "External id": 973226,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936158935.302, "dur": 52.480, + "args": { + "External id": 973227,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936158997.051, "dur": 114.544, + "args": { + "External id": 973228,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936159147.427, "dur": 31.186, + "args": { + "External id": 973229,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936159188.847, "dur": 52.044, + "args": { + "External id": 973230,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936159265.539, "dur": 24.310, + "args": { + "External id": 973231,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12275 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.2)", "pid": 2338710, "tid": 2338710, + "ts": 6345936159469.617, "dur": 93.767, + "args": { + "External id": 973232,"Record function id": 0, "Ev Idx": 12276 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345936159652.443, "dur": 56.445, + "args": { + "External id": 973233,"Record function id": 0, "Ev Idx": 12277 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.3)", "pid": 2338710, "tid": 2338710, + "ts": 6345936159719.143, "dur": 27546.054, + "args": { + "External id": 973234,"Record function id": 0, "Ev Idx": 12278 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2338710, "tid": 2338710, + "ts": 6345936159732.071, "dur": 1182.303, + "args": { + "External id": 973235,"Record function id": 0, "Ev Idx": 12279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936159826.819, "dur": 11.712, + "args": { + "External id": 973236,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936159853.813, "dur": 46.715, + "args": { + "External id": 973237,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936159860.018, "dur": 2.635, + "args": { + "External id": 973238,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936159869.809, "dur": 0.392, + "args": { + "External id": 973239,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936159871.253, "dur": 0.487, + "args": { + "External id": 973240,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936159875.166, "dur": 0.635, + "args": { + "External id": 973241,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936159879.265, "dur": 0.439, + "args": { + "External id": 973242,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936159880.755, "dur": 0.729, + "args": { + "External id": 973243,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936159885.230, "dur": 5.021, + "args": { + "External id": 973244,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936159891.133, "dur": 0.294, + "args": { + "External id": 973245,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936159892.272, "dur": 0.378, + "args": { + "External id": 973246,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936159914.361, "dur": 64.540, + "args": { + "External id": 973247,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345936160113.917, "dur": 181.269, + "args": { + "External id": 973248,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936160133.243, "dur": 9.404, + "args": { + "External id": 973249,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345936160149.948, "dur": 16.495, + "args": { + "External id": 973250,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936160155.494, "dur": 10.193, + "args": { + "External id": 973251,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936160162.521, "dur": 0.930, + "args": { + "External id": 973252,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936160175.301, "dur": 36.173, + "args": { + "External id": 973253,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936160177.390, "dur": 0.662, + "args": { + "External id": 973254,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936160181.781, "dur": 2.483, + "args": { + "External id": 973255,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936160185.293, "dur": 0.568, + "args": { + "External id": 973256,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936160186.802, "dur": 2.849, + "args": { + "External id": 973257,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936160195.386, "dur": 0.368, + "args": { + "External id": 973258,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936160196.423, "dur": 0.525, + "args": { + "External id": 973259,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936160198.021, "dur": 0.468, + "args": { + "External id": 973260,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936160204.591, "dur": 0.423, + "args": { + "External id": 973261,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936160205.854, "dur": 0.580, + "args": { + "External id": 973262,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936160238.017, "dur": 46.373, + "args": { + "External id": 973263,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345936160364.894, "dur": 437.171, + "args": { + "External id": 973264,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936160407.118, "dur": 389.129, + "args": { + "External id": 973265,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12309, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345936160421.329, "dur": 368.329, + "args": { + "External id": 973266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936160831.023, "dur": 3.033, + "args": { + "External id": 973267,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12311, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2338710, "tid": 2338710, + "ts": 6345936160939.236, "dur": 26018.346, + "args": { + "External id": 973268,"Record function id": 0, "Ev Idx": 12312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936161128.357, "dur": 7.800, + "args": { + "External id": 973269,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936161140.394, "dur": 1.680, + "args": { + "External id": 973270,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936161144.028, "dur": 3.586, + "args": { + "External id": 973271,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936161149.758, "dur": 1.474, + "args": { + "External id": 973272,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936161152.810, "dur": 1.046, + "args": { + "External id": 973273,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936161155.448, "dur": 1.147, + "args": { + "External id": 973274,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936161160.914, "dur": 1.204, + "args": { + "External id": 973275,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936161163.855, "dur": 2.788, + "args": { + "External id": 973276,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936161168.130, "dur": 0.760, + "args": { + "External id": 973277,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936161173.472, "dur": 1.039, + "args": { + "External id": 973278,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936161198.656, "dur": 25701.700, + "args": { + "External id": 973279,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936161218.956, "dur": 25671.442, + "args": { + "External id": 973280,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936161241.991, "dur": 20.916, + "args": { + "External id": 973281,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936161267.058, "dur": 25577.582, + "args": { + "External id": 973282,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936161270.099, "dur": 25573.015, + "args": { + "External id": 973283,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936161279.490, "dur": 7.157, + "args": { + "External id": 973284,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936161288.551, "dur": 25550.717, + "args": { + "External id": 973285,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936187184.798, "dur": 48.899, + "args": { + "External id": 973286,"Sequence number": 10552249, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12330 + } + }, + { + "ph": "s", "id": 217, "pid": 2338710, "tid": 2338710, "ts": 6345936187184.798, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936187207.825, "dur": 19.198, + "args": { + "External id": 973287,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936187219.478, "dur": 7.046, + "args": { + "External id": 973288,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936187315.487, "dur": 88.782, + "args": { + "External id": 973289,"Record function id": 0, "Ev Idx": 12333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345936187408.459, "dur": 1323.436, + "args": { + "External id": 973290,"Record function id": 0, "Ev Idx": 12334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936187454.318, "dur": 1260.717, + "args": { + "External id": 973291,"Sequence number": 10552250, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12335 + } + }, + { + "ph": "s", "id": 216, "pid": 2338710, "tid": 2338710, "ts": 6345936187454.318, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936187536.843, "dur": 61.768, + "args": { + "External id": 973292,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936187613.744, "dur": 124.165, + "args": { + "External id": 973293,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936187757.533, "dur": 45.306, + "args": { + "External id": 973294,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936187810.037, "dur": 34.254, + "args": { + "External id": 973295,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936187874.631, "dur": 29.753, + "args": { + "External id": 973296,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345936187926.029, "dur": 20.419, + "args": { + "External id": 973297,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936187975.541, "dur": 235.313, + "args": { + "External id": 973298,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936188102.319, "dur": 17.863, + "args": { + "External id": 973299,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936188109.679, "dur": 8.774, + "args": { + "External id": 973300,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936188123.719, "dur": 4.488, + "args": { + "External id": 973301,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936188129.841, "dur": 1.274, + "args": { + "External id": 973302,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936188136.426, "dur": 6.160, + "args": { + "External id": 973303,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936188227.590, "dur": 66.337, + "args": { + "External id": 973304,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345936188337.046, "dur": 37.645, + "args": { + "External id": 973305,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936188386.120, "dur": 50.969, + "args": { + "External id": 973306,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936188445.115, "dur": 39.909, + "args": { + "External id": 973307,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936188509.060, "dur": 32.044, + "args": { + "External id": 973308,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936188547.356, "dur": 42.378, + "args": { + "External id": 973309,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936188611.656, "dur": 22.518, + "args": { + "External id": 973310,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12354 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.3)", "pid": 2338710, "tid": 2338710, + "ts": 6345936188809.542, "dur": 91.277, + "args": { + "External id": 973311,"Record function id": 0, "Ev Idx": 12355 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345936188988.355, "dur": 124.466, + "args": { + "External id": 973312,"Record function id": 0, "Ev Idx": 12356 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.4)", "pid": 2338710, "tid": 2338710, + "ts": 6345936189152.033, "dur": 31724.219, + "args": { + "External id": 973313,"Record function id": 0, "Ev Idx": 12357 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2338710, "tid": 2338710, + "ts": 6345936189168.342, "dur": 1118.783, + "args": { + "External id": 973314,"Record function id": 0, "Ev Idx": 12358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936189266.129, "dur": 13.090, + "args": { + "External id": 973315,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936189295.081, "dur": 46.530, + "args": { + "External id": 973316,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936189303.445, "dur": 2.631, + "args": { + "External id": 973317,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936189310.467, "dur": 0.462, + "args": { + "External id": 973318,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936189311.685, "dur": 0.406, + "args": { + "External id": 973319,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936189315.623, "dur": 2.488, + "args": { + "External id": 973320,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936189318.954, "dur": 0.476, + "args": { + "External id": 973321,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936189322.969, "dur": 0.590, + "args": { + "External id": 973322,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936189326.674, "dur": 2.642, + "args": { + "External id": 973323,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936189330.097, "dur": 0.362, + "args": { + "External id": 973324,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936189333.468, "dur": 0.389, + "args": { + "External id": 973325,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936189354.056, "dur": 66.877, + "args": { + "External id": 973326,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345936189460.936, "dur": 144.119, + "args": { + "External id": 973327,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936189476.544, "dur": 4.453, + "args": { + "External id": 973328,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345936189486.983, "dur": 15.917, + "args": { + "External id": 973329,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936189492.641, "dur": 9.727, + "args": { + "External id": 973330,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936189498.164, "dur": 2.636, + "args": { + "External id": 973331,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936189511.098, "dur": 34.057, + "args": { + "External id": 973332,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936189512.983, "dur": 0.372, + "args": { + "External id": 973333,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936189517.292, "dur": 0.638, + "args": { + "External id": 973334,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936189518.837, "dur": 0.573, + "args": { + "External id": 973335,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936189524.970, "dur": 2.771, + "args": { + "External id": 973336,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936189528.522, "dur": 0.342, + "args": { + "External id": 973337,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936189529.590, "dur": 2.463, + "args": { + "External id": 973338,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936189535.461, "dur": 0.421, + "args": { + "External id": 973339,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936189536.616, "dur": 0.285, + "args": { + "External id": 973340,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936189539.790, "dur": 0.294, + "args": { + "External id": 973341,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936189559.994, "dur": 36.105, + "args": { + "External id": 973342,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345936189667.655, "dur": 497.442, + "args": { + "External id": 973343,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936189709.376, "dur": 448.551, + "args": { + "External id": 973344,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12388, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345936189721.283, "dur": 426.623, + "args": { + "External id": 973345,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936190195.490, "dur": 2.876, + "args": { + "External id": 973346,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12390, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2338710, "tid": 2338710, + "ts": 6345936190313.133, "dur": 30297.180, + "args": { + "External id": 973347,"Record function id": 0, "Ev Idx": 12391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936190428.991, "dur": 7.835, + "args": { + "External id": 973348,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936190441.035, "dur": 1.419, + "args": { + "External id": 973349,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936190444.451, "dur": 3.730, + "args": { + "External id": 973350,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936190450.014, "dur": 0.816, + "args": { + "External id": 973351,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936190452.579, "dur": 1.225, + "args": { + "External id": 973352,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936190455.531, "dur": 0.774, + "args": { + "External id": 973353,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936190460.389, "dur": 0.756, + "args": { + "External id": 973354,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936190465.274, "dur": 2.247, + "args": { + "External id": 973355,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936190468.855, "dur": 1.163, + "args": { + "External id": 973356,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936190471.510, "dur": 0.970, + "args": { + "External id": 973357,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936190494.987, "dur": 30053.888, + "args": { + "External id": 973358,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936190515.386, "dur": 30022.213, + "args": { + "External id": 973359,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936190539.370, "dur": 20.470, + "args": { + "External id": 973360,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936190564.265, "dur": 29928.489, + "args": { + "External id": 973361,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936190567.150, "dur": 29924.403, + "args": { + "External id": 973362,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936190574.206, "dur": 7.444, + "args": { + "External id": 973363,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936190583.396, "dur": 29903.440, + "args": { + "External id": 973364,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936220797.571, "dur": 47.638, + "args": { + "External id": 973365,"Sequence number": 10552251, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12409 + } + }, + { + "ph": "s", "id": 215, "pid": 2338710, "tid": 2338710, "ts": 6345936220797.571, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936220821.653, "dur": 16.860, + "args": { + "External id": 973366,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936220831.273, "dur": 6.922, + "args": { + "External id": 973367,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936220929.540, "dur": 105.133, + "args": { + "External id": 973368,"Record function id": 0, "Ev Idx": 12412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345936221037.909, "dur": 1396.101, + "args": { + "External id": 973369,"Record function id": 0, "Ev Idx": 12413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936221131.819, "dur": 1284.740, + "args": { + "External id": 973370,"Sequence number": 10552252, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12414 + } + }, + { + "ph": "s", "id": 214, "pid": 2338710, "tid": 2338710, "ts": 6345936221131.819, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936221223.674, "dur": 65.132, + "args": { + "External id": 973371,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936221308.146, "dur": 119.655, + "args": { + "External id": 973372,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936221442.868, "dur": 44.021, + "args": { + "External id": 973373,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936221497.339, "dur": 33.688, + "args": { + "External id": 973374,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936221562.258, "dur": 32.272, + "args": { + "External id": 973375,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345936221620.507, "dur": 19.935, + "args": { + "External id": 973376,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936221667.915, "dur": 163.609, + "args": { + "External id": 973377,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936221728.866, "dur": 15.328, + "args": { + "External id": 973378,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936221735.809, "dur": 7.236, + "args": { + "External id": 973379,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936221748.202, "dur": 5.491, + "args": { + "External id": 973380,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936221756.928, "dur": 1.354, + "args": { + "External id": 973381,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936221761.029, "dur": 7.596, + "args": { + "External id": 973382,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936221845.202, "dur": 60.398, + "args": { + "External id": 973383,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345936221941.974, "dur": 34.395, + "args": { + "External id": 973384,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936221986.949, "dur": 111.055, + "args": { + "External id": 973385,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936222114.626, "dur": 49.674, + "args": { + "External id": 973386,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936222199.499, "dur": 31.052, + "args": { + "External id": 973387,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936222240.352, "dur": 45.426, + "args": { + "External id": 973388,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936222307.818, "dur": 23.460, + "args": { + "External id": 973389,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12433 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.4)", "pid": 2338710, "tid": 2338710, + "ts": 6345936222511.691, "dur": 90.519, + "args": { + "External id": 973390,"Record function id": 0, "Ev Idx": 12434 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345936222692.976, "dur": 54.042, + "args": { + "External id": 973391,"Record function id": 0, "Ev Idx": 12435 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.5)", "pid": 2338710, "tid": 2338710, + "ts": 6345936222756.851, "dur": 32026.145, + "args": { + "External id": 973392,"Record function id": 0, "Ev Idx": 12436 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2338710, "tid": 2338710, + "ts": 6345936222769.270, "dur": 1196.727, + "args": { + "External id": 973393,"Record function id": 0, "Ev Idx": 12437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936222862.920, "dur": 11.724, + "args": { + "External id": 973394,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936222891.012, "dur": 47.017, + "args": { + "External id": 973395,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936222899.568, "dur": 2.706, + "args": { + "External id": 973396,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936222906.853, "dur": 0.788, + "args": { + "External id": 973397,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936222910.947, "dur": 0.481, + "args": { + "External id": 973398,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936222912.381, "dur": 0.475, + "args": { + "External id": 973399,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936222915.631, "dur": 0.472, + "args": { + "External id": 973400,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936222919.488, "dur": 0.507, + "args": { + "External id": 973401,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936222920.573, "dur": 5.650, + "args": { + "External id": 973402,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936222926.883, "dur": 0.491, + "args": { + "External id": 973403,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936222930.599, "dur": 0.447, + "args": { + "External id": 973404,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936222950.114, "dur": 89.368, + "args": { + "External id": 973405,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345936223129.662, "dur": 169.795, + "args": { + "External id": 973406,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936223151.558, "dur": 6.887, + "args": { + "External id": 973407,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345936223165.273, "dur": 12.793, + "args": { + "External id": 973408,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936223171.198, "dur": 6.395, + "args": { + "External id": 973409,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936223174.859, "dur": 0.841, + "args": { + "External id": 973410,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936223186.447, "dur": 40.104, + "args": { + "External id": 973411,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936223190.898, "dur": 3.022, + "args": { + "External id": 973412,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936223195.072, "dur": 0.821, + "args": { + "External id": 973413,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936223196.637, "dur": 0.447, + "args": { + "External id": 973414,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936223202.666, "dur": 3.407, + "args": { + "External id": 973415,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936223207.167, "dur": 0.304, + "args": { + "External id": 973416,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936223208.293, "dur": 0.307, + "args": { + "External id": 973417,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936223214.478, "dur": 0.591, + "args": { + "External id": 973418,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936223215.896, "dur": 0.637, + "args": { + "External id": 973419,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936223219.233, "dur": 2.380, + "args": { + "External id": 973420,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936223245.426, "dur": 43.733, + "args": { + "External id": 973421,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345936223363.867, "dur": 489.387, + "args": { + "External id": 973422,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936223403.232, "dur": 444.671, + "args": { + "External id": 973423,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12467, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345936223415.375, "dur": 426.367, + "args": { + "External id": 973424,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936223880.587, "dur": 2.511, + "args": { + "External id": 973425,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12469, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2338710, "tid": 2338710, + "ts": 6345936223989.089, "dur": 30548.275, + "args": { + "External id": 973426,"Record function id": 0, "Ev Idx": 12470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936224176.274, "dur": 8.409, + "args": { + "External id": 973427,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936224189.209, "dur": 1.162, + "args": { + "External id": 973428,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936224192.554, "dur": 3.369, + "args": { + "External id": 973429,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936224197.831, "dur": 1.307, + "args": { + "External id": 973430,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936224200.651, "dur": 0.914, + "args": { + "External id": 973431,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936224205.635, "dur": 1.197, + "args": { + "External id": 973432,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936224211.649, "dur": 1.110, + "args": { + "External id": 973433,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936224214.290, "dur": 2.588, + "args": { + "External id": 973434,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936224218.302, "dur": 1.174, + "args": { + "External id": 973435,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936224223.160, "dur": 0.929, + "args": { + "External id": 973436,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936224246.263, "dur": 30234.177, + "args": { + "External id": 973437,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936224277.357, "dur": 30191.891, + "args": { + "External id": 973438,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936224296.890, "dur": 20.987, + "args": { + "External id": 973439,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936224323.471, "dur": 30101.311, + "args": { + "External id": 973440,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936224326.511, "dur": 30097.221, + "args": { + "External id": 973441,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936224333.469, "dur": 6.520, + "args": { + "External id": 973442,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936224342.051, "dur": 30077.194, + "args": { + "External id": 973443,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936254709.063, "dur": 42.180, + "args": { + "External id": 973444,"Sequence number": 10552253, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12488 + } + }, + { + "ph": "s", "id": 213, "pid": 2338710, "tid": 2338710, "ts": 6345936254709.063, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936254730.354, "dur": 14.430, + "args": { + "External id": 973445,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936254738.290, "dur": 6.187, + "args": { + "External id": 973446,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936254832.745, "dur": 87.998, + "args": { + "External id": 973447,"Record function id": 0, "Ev Idx": 12491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345936254923.447, "dur": 1426.768, + "args": { + "External id": 973448,"Record function id": 0, "Ev Idx": 12492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936254967.909, "dur": 1364.757, + "args": { + "External id": 973449,"Sequence number": 10552254, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12493 + } + }, + { + "ph": "s", "id": 212, "pid": 2338710, "tid": 2338710, "ts": 6345936254967.909, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936255109.181, "dur": 65.758, + "args": { + "External id": 973450,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936255196.410, "dur": 124.105, + "args": { + "External id": 973451,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936255336.659, "dur": 49.953, + "args": { + "External id": 973452,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936255399.192, "dur": 36.906, + "args": { + "External id": 973453,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936255469.663, "dur": 30.752, + "args": { + "External id": 973454,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345936255522.237, "dur": 19.575, + "args": { + "External id": 973455,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936255569.045, "dur": 173.233, + "args": { + "External id": 973456,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936255629.607, "dur": 14.852, + "args": { + "External id": 973457,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936255636.531, "dur": 6.979, + "args": { + "External id": 973458,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936255650.335, "dur": 6.892, + "args": { + "External id": 973459,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936255658.919, "dur": 1.316, + "args": { + "External id": 973460,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936255673.657, "dur": 5.059, + "args": { + "External id": 973461,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936255755.573, "dur": 61.854, + "args": { + "External id": 973462,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345936255860.700, "dur": 35.052, + "args": { + "External id": 973463,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936255906.732, "dur": 52.382, + "args": { + "External id": 973464,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936255965.761, "dur": 39.780, + "args": { + "External id": 973465,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936256100.346, "dur": 39.205, + "args": { + "External id": 973466,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936256148.241, "dur": 50.566, + "args": { + "External id": 973467,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936256225.438, "dur": 21.214, + "args": { + "External id": 973468,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12512 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.5)", "pid": 2338710, "tid": 2338710, + "ts": 6345936256431.051, "dur": 93.860, + "args": { + "External id": 973469,"Record function id": 0, "Ev Idx": 12513 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345936256615.088, "dur": 54.283, + "args": { + "External id": 973470,"Record function id": 0, "Ev Idx": 12514 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.6)", "pid": 2338710, "tid": 2338710, + "ts": 6345936256679.622, "dur": 31784.805, + "args": { + "External id": 973471,"Record function id": 0, "Ev Idx": 12515 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2338710, "tid": 2338710, + "ts": 6345936256691.362, "dur": 1112.369, + "args": { + "External id": 973472,"Record function id": 0, "Ev Idx": 12516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936256784.811, "dur": 11.758, + "args": { + "External id": 973473,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936256814.154, "dur": 43.468, + "args": { + "External id": 973474,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936256819.837, "dur": 2.582, + "args": { + "External id": 973475,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936256827.075, "dur": 0.506, + "args": { + "External id": 973476,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936256831.362, "dur": 0.600, + "args": { + "External id": 973477,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936256833.118, "dur": 0.727, + "args": { + "External id": 973478,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936256836.827, "dur": 0.509, + "args": { + "External id": 973479,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936256840.443, "dur": 0.557, + "args": { + "External id": 973480,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936256841.846, "dur": 4.179, + "args": { + "External id": 973481,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936256848.804, "dur": 0.387, + "args": { + "External id": 973482,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936256849.924, "dur": 0.468, + "args": { + "External id": 973483,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936256869.259, "dur": 62.383, + "args": { + "External id": 973484,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345936256973.944, "dur": 223.588, + "args": { + "External id": 973485,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936256990.398, "dur": 4.124, + "args": { + "External id": 973486,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345936257000.519, "dur": 35.606, + "args": { + "External id": 973487,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936257006.030, "dur": 29.390, + "args": { + "External id": 973488,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936257030.979, "dur": 0.941, + "args": { + "External id": 973489,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936257045.365, "dur": 80.617, + "args": { + "External id": 973490,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936257050.087, "dur": 2.638, + "args": { + "External id": 973491,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936257053.690, "dur": 0.527, + "args": { + "External id": 973492,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936257054.981, "dur": 38.915, + "args": { + "External id": 973493,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936257102.023, "dur": 2.795, + "args": { + "External id": 973494,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936257105.717, "dur": 0.475, + "args": { + "External id": 973495,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936257108.925, "dur": 0.542, + "args": { + "External id": 973496,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936257112.357, "dur": 0.410, + "args": { + "External id": 973497,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936257113.958, "dur": 0.378, + "args": { + "External id": 973498,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936257117.566, "dur": 2.832, + "args": { + "External id": 973499,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936257143.350, "dur": 44.262, + "args": { + "External id": 973500,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345936257265.633, "dur": 425.088, + "args": { + "External id": 973501,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936257306.767, "dur": 377.999, + "args": { + "External id": 973502,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12546, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345936257318.728, "dur": 359.755, + "args": { + "External id": 973503,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936257718.890, "dur": 2.866, + "args": { + "External id": 973504,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12548, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2338710, "tid": 2338710, + "ts": 6345936257828.441, "dur": 30394.638, + "args": { + "External id": 973505,"Record function id": 0, "Ev Idx": 12549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936257938.802, "dur": 6.308, + "args": { + "External id": 973506,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936257949.000, "dur": 1.057, + "args": { + "External id": 973507,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936257952.234, "dur": 3.913, + "args": { + "External id": 973508,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936257958.041, "dur": 0.975, + "args": { + "External id": 973509,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936257960.662, "dur": 1.161, + "args": { + "External id": 973510,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936257963.419, "dur": 0.996, + "args": { + "External id": 973511,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936257968.248, "dur": 1.171, + "args": { + "External id": 973512,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936257971.253, "dur": 2.122, + "args": { + "External id": 973513,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936257974.882, "dur": 0.781, + "args": { + "External id": 973514,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936257977.235, "dur": 0.744, + "args": { + "External id": 973515,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936258002.180, "dur": 30159.939, + "args": { + "External id": 973516,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936258045.576, "dur": 30104.959, + "args": { + "External id": 973517,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936258106.729, "dur": 22.385, + "args": { + "External id": 973518,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936258135.269, "dur": 29967.362, + "args": { + "External id": 973519,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936258138.465, "dur": 29963.120, + "args": { + "External id": 973520,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936258145.784, "dur": 8.019, + "args": { + "External id": 973521,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936258155.992, "dur": 29940.988, + "args": { + "External id": 973522,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936288395.302, "dur": 40.572, + "args": { + "External id": 973523,"Sequence number": 10552255, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12567 + } + }, + { + "ph": "s", "id": 211, "pid": 2338710, "tid": 2338710, "ts": 6345936288395.302, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936288417.802, "dur": 12.003, + "args": { + "External id": 973524,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936288423.152, "dur": 6.415, + "args": { + "External id": 973525,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936288513.366, "dur": 87.724, + "args": { + "External id": 973526,"Record function id": 0, "Ev Idx": 12570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345936288603.863, "dur": 1318.921, + "args": { + "External id": 973527,"Record function id": 0, "Ev Idx": 12571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936288649.326, "dur": 1255.326, + "args": { + "External id": 973528,"Sequence number": 10552256, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12572 + } + }, + { + "ph": "s", "id": 210, "pid": 2338710, "tid": 2338710, "ts": 6345936288649.326, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936288738.180, "dur": 58.345, + "args": { + "External id": 973529,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936288812.589, "dur": 119.658, + "args": { + "External id": 973530,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936288947.945, "dur": 44.298, + "args": { + "External id": 973531,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936289002.098, "dur": 101.293, + "args": { + "External id": 973532,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936289146.094, "dur": 32.891, + "args": { + "External id": 973533,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345936289201.481, "dur": 19.971, + "args": { + "External id": 973534,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936289250.653, "dur": 160.591, + "args": { + "External id": 973535,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936289312.146, "dur": 17.336, + "args": { + "External id": 973536,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936289321.773, "dur": 6.712, + "args": { + "External id": 973537,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936289333.550, "dur": 4.425, + "args": { + "External id": 973538,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936289339.309, "dur": 1.141, + "args": { + "External id": 973539,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936289343.110, "dur": 6.970, + "args": { + "External id": 973540,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936289423.270, "dur": 67.937, + "args": { + "External id": 973541,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345936289527.655, "dur": 33.477, + "args": { + "External id": 973542,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936289571.112, "dur": 49.937, + "args": { + "External id": 973543,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936289630.329, "dur": 40.574, + "args": { + "External id": 973544,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936289695.960, "dur": 27.505, + "args": { + "External id": 973545,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936289732.049, "dur": 40.857, + "args": { + "External id": 973546,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936289797.003, "dur": 19.730, + "args": { + "External id": 973547,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12591 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.6)", "pid": 2338710, "tid": 2338710, + "ts": 6345936289999.560, "dur": 162.264, + "args": { + "External id": 973548,"Record function id": 0, "Ev Idx": 12592 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345936290259.254, "dur": 58.461, + "args": { + "External id": 973549,"Record function id": 0, "Ev Idx": 12593 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.7)", "pid": 2338710, "tid": 2338710, + "ts": 6345936290328.983, "dur": 31292.051, + "args": { + "External id": 973550,"Record function id": 0, "Ev Idx": 12594 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2338710, "tid": 2338710, + "ts": 6345936290340.145, "dur": 1101.176, + "args": { + "External id": 973551,"Record function id": 0, "Ev Idx": 12595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936290435.258, "dur": 12.154, + "args": { + "External id": 973552,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936290462.865, "dur": 43.679, + "args": { + "External id": 973553,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936290469.039, "dur": 2.464, + "args": { + "External id": 973554,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936290478.205, "dur": 0.370, + "args": { + "External id": 973555,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936290479.653, "dur": 0.364, + "args": { + "External id": 973556,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936290480.839, "dur": 0.511, + "args": { + "External id": 973557,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936290486.645, "dur": 0.365, + "args": { + "External id": 973558,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936290487.801, "dur": 0.644, + "args": { + "External id": 973559,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936290491.360, "dur": 5.345, + "args": { + "External id": 973560,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936290497.750, "dur": 0.328, + "args": { + "External id": 973561,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936290499.000, "dur": 0.330, + "args": { + "External id": 973562,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936290520.365, "dur": 69.701, + "args": { + "External id": 973563,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345936290630.146, "dur": 144.518, + "args": { + "External id": 973564,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936290646.076, "dur": 4.637, + "args": { + "External id": 973565,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345936290656.626, "dur": 11.670, + "args": { + "External id": 973566,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936290662.010, "dur": 5.622, + "args": { + "External id": 973567,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936290665.620, "dur": 0.696, + "args": { + "External id": 973568,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936290677.217, "dur": 34.513, + "args": { + "External id": 973569,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936290678.913, "dur": 2.406, + "args": { + "External id": 973570,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936290682.343, "dur": 0.484, + "args": { + "External id": 973571,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936290685.731, "dur": 0.455, + "args": { + "External id": 973572,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936290689.501, "dur": 2.607, + "args": { + "External id": 973573,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936290694.727, "dur": 0.522, + "args": { + "External id": 973574,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936290696.038, "dur": 0.441, + "args": { + "External id": 973575,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936290699.783, "dur": 0.487, + "args": { + "External id": 973576,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936290703.260, "dur": 0.275, + "args": { + "External id": 973577,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936290704.533, "dur": 2.287, + "args": { + "External id": 973578,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936290728.166, "dur": 37.788, + "args": { + "External id": 973579,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345936290836.965, "dur": 480.457, + "args": { + "External id": 973580,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936290874.006, "dur": 436.135, + "args": { + "External id": 973581,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12625, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345936290887.391, "dur": 414.400, + "args": { + "External id": 973582,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936291350.545, "dur": 3.170, + "args": { + "External id": 973583,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12627, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2338710, "tid": 2338710, + "ts": 6345936291466.250, "dur": 29924.586, + "args": { + "External id": 973584,"Record function id": 0, "Ev Idx": 12628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936291580.659, "dur": 7.690, + "args": { + "External id": 973585,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936291592.425, "dur": 0.961, + "args": { + "External id": 973586,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936291595.390, "dur": 3.455, + "args": { + "External id": 973587,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936291610.685, "dur": 0.933, + "args": { + "External id": 973588,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936291613.269, "dur": 0.852, + "args": { + "External id": 973589,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936291615.689, "dur": 1.157, + "args": { + "External id": 973590,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936291621.161, "dur": 0.837, + "args": { + "External id": 973591,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936291625.372, "dur": 2.026, + "args": { + "External id": 973592,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936291628.805, "dur": 0.577, + "args": { + "External id": 973593,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936291630.722, "dur": 0.816, + "args": { + "External id": 973594,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936291657.582, "dur": 29677.936, + "args": { + "External id": 973595,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936291676.526, "dur": 29648.240, + "args": { + "External id": 973596,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936291693.971, "dur": 18.632, + "args": { + "External id": 973597,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936291717.190, "dur": 29563.766, + "args": { + "External id": 973598,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936291720.076, "dur": 29559.558, + "args": { + "External id": 973599,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936291726.699, "dur": 7.710, + "args": { + "External id": 973600,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936291736.121, "dur": 29538.912, + "args": { + "External id": 973601,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936321550.651, "dur": 41.327, + "args": { + "External id": 973602,"Sequence number": 10552257, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12646 + } + }, + { + "ph": "s", "id": 209, "pid": 2338710, "tid": 2338710, "ts": 6345936321550.651, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936321572.108, "dur": 13.959, + "args": { + "External id": 973603,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936321579.660, "dur": 6.154, + "args": { + "External id": 973604,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936321670.345, "dur": 88.647, + "args": { + "External id": 973605,"Record function id": 0, "Ev Idx": 12649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345936321760.532, "dur": 1366.064, + "args": { + "External id": 973606,"Record function id": 0, "Ev Idx": 12650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936321809.471, "dur": 1299.845, + "args": { + "External id": 973607,"Sequence number": 10552258, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12651 + } + }, + { + "ph": "s", "id": 208, "pid": 2338710, "tid": 2338710, "ts": 6345936321809.471, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936321892.194, "dur": 56.947, + "args": { + "External id": 973608,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936321963.513, "dur": 177.965, + "args": { + "External id": 973609,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936322164.461, "dur": 50.126, + "args": { + "External id": 973610,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936322224.842, "dur": 34.070, + "args": { + "External id": 973611,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936322291.838, "dur": 31.678, + "args": { + "External id": 973612,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345936322350.665, "dur": 21.454, + "args": { + "External id": 973613,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936322401.362, "dur": 159.973, + "args": { + "External id": 973614,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936322464.187, "dur": 13.473, + "args": { + "External id": 973615,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936322471.133, "dur": 5.722, + "args": { + "External id": 973616,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936322480.424, "dur": 4.509, + "args": { + "External id": 973617,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936322488.017, "dur": 1.099, + "args": { + "External id": 973618,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936322491.728, "dur": 7.141, + "args": { + "External id": 973619,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936322573.749, "dur": 59.227, + "args": { + "External id": 973620,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345936322668.361, "dur": 34.433, + "args": { + "External id": 973621,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936322713.503, "dur": 49.660, + "args": { + "External id": 973622,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936322772.649, "dur": 40.227, + "args": { + "External id": 973623,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936322847.118, "dur": 31.156, + "args": { + "External id": 973624,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936322887.250, "dur": 40.728, + "args": { + "External id": 973625,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936322947.714, "dur": 20.081, + "args": { + "External id": 973626,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12670 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.7)", "pid": 2338710, "tid": 2338710, + "ts": 6345936323205.402, "dur": 91.942, + "args": { + "External id": 973627,"Record function id": 0, "Ev Idx": 12671 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345936323382.550, "dur": 54.481, + "args": { + "External id": 973628,"Record function id": 0, "Ev Idx": 12672 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.8)", "pid": 2338710, "tid": 2338710, + "ts": 6345936323447.028, "dur": 32998.269, + "args": { + "External id": 973629,"Record function id": 0, "Ev Idx": 12673 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2338710, "tid": 2338710, + "ts": 6345936323459.908, "dur": 1098.203, + "args": { + "External id": 973630,"Record function id": 0, "Ev Idx": 12674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936323552.673, "dur": 12.206, + "args": { + "External id": 973631,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936323579.159, "dur": 44.726, + "args": { + "External id": 973632,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936323586.714, "dur": 2.689, + "args": { + "External id": 973633,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936323593.580, "dur": 0.568, + "args": { + "External id": 973634,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936323597.779, "dur": 0.440, + "args": { + "External id": 973635,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936323599.215, "dur": 0.529, + "args": { + "External id": 973636,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936323603.109, "dur": 0.442, + "args": { + "External id": 973637,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936323606.130, "dur": 0.489, + "args": { + "External id": 973638,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936323608.079, "dur": 4.263, + "args": { + "External id": 973639,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936323613.268, "dur": 0.440, + "args": { + "External id": 973640,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936323616.683, "dur": 0.427, + "args": { + "External id": 973641,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936323636.127, "dur": 64.245, + "args": { + "External id": 973642,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345936323740.547, "dur": 147.585, + "args": { + "External id": 973643,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936323758.043, "dur": 4.385, + "args": { + "External id": 973644,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345936323768.965, "dur": 15.109, + "args": { + "External id": 973645,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936323777.327, "dur": 6.220, + "args": { + "External id": 973646,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936323781.189, "dur": 0.771, + "args": { + "External id": 973647,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936323791.427, "dur": 37.512, + "args": { + "External id": 973648,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936323795.328, "dur": 2.720, + "args": { + "External id": 973649,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936323799.223, "dur": 0.311, + "args": { + "External id": 973650,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936323800.623, "dur": 0.524, + "args": { + "External id": 973651,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936323806.436, "dur": 2.603, + "args": { + "External id": 973652,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936323809.900, "dur": 0.430, + "args": { + "External id": 973653,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936323811.220, "dur": 0.474, + "args": { + "External id": 973654,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936323815.384, "dur": 0.556, + "args": { + "External id": 973655,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936323816.801, "dur": 0.675, + "args": { + "External id": 973656,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936323820.388, "dur": 3.433, + "args": { + "External id": 973657,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936323841.466, "dur": 37.507, + "args": { + "External id": 973658,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345936323951.573, "dur": 487.249, + "args": { + "External id": 973659,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936323989.921, "dur": 441.877, + "args": { + "External id": 973660,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12704, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345936324001.825, "dur": 422.345, + "args": { + "External id": 973661,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936324469.755, "dur": 3.046, + "args": { + "External id": 973662,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12706, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2338710, "tid": 2338710, + "ts": 6345936324583.049, "dur": 31606.437, + "args": { + "External id": 973663,"Record function id": 0, "Ev Idx": 12707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936324696.115, "dur": 7.685, + "args": { + "External id": 973664,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936324707.909, "dur": 0.972, + "args": { + "External id": 973665,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936324710.869, "dur": 4.183, + "args": { + "External id": 973666,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936324717.463, "dur": 0.996, + "args": { + "External id": 973667,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936324720.117, "dur": 1.130, + "args": { + "External id": 973668,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936324722.889, "dur": 1.026, + "args": { + "External id": 973669,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936324729.209, "dur": 1.334, + "args": { + "External id": 973670,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936324732.222, "dur": 2.376, + "args": { + "External id": 973671,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936324736.082, "dur": 0.708, + "args": { + "External id": 973672,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936324738.165, "dur": 0.644, + "args": { + "External id": 973673,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936324761.694, "dur": 31367.107, + "args": { + "External id": 973674,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936324781.508, "dur": 31336.321, + "args": { + "External id": 973675,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936324802.878, "dur": 19.954, + "args": { + "External id": 973676,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936324827.355, "dur": 31219.303, + "args": { + "External id": 973677,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936324830.297, "dur": 31215.274, + "args": { + "External id": 973678,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936324837.355, "dur": 5.584, + "args": { + "External id": 973679,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936324844.989, "dur": 31195.594, + "args": { + "External id": 973680,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936356370.038, "dur": 41.406, + "args": { + "External id": 973681,"Sequence number": 10552259, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12725 + } + }, + { + "ph": "s", "id": 207, "pid": 2338710, "tid": 2338710, "ts": 6345936356370.038, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936356390.705, "dur": 13.881, + "args": { + "External id": 973682,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936356397.794, "dur": 6.550, + "args": { + "External id": 973683,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936356497.243, "dur": 89.244, + "args": { + "External id": 973684,"Record function id": 0, "Ev Idx": 12728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345936356587.927, "dur": 1340.358, + "args": { + "External id": 973685,"Record function id": 0, "Ev Idx": 12729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936356636.321, "dur": 1276.256, + "args": { + "External id": 973686,"Sequence number": 10552260, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12730 + } + }, + { + "ph": "s", "id": 206, "pid": 2338710, "tid": 2338710, "ts": 6345936356636.321, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936356730.454, "dur": 56.688, + "args": { + "External id": 973687,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936356802.620, "dur": 120.709, + "args": { + "External id": 973688,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936356941.258, "dur": 45.578, + "args": { + "External id": 973689,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936356994.226, "dur": 55.226, + "args": { + "External id": 973690,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936357133.700, "dur": 36.616, + "args": { + "External id": 973691,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345936357195.211, "dur": 21.976, + "args": { + "External id": 973692,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936357245.268, "dur": 163.757, + "args": { + "External id": 973693,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936357307.456, "dur": 16.020, + "args": { + "External id": 973694,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936357314.518, "dur": 7.669, + "args": { + "External id": 973695,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936357328.970, "dur": 5.999, + "args": { + "External id": 973696,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936357336.319, "dur": 3.022, + "args": { + "External id": 973697,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936357341.920, "dur": 5.448, + "args": { + "External id": 973698,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936357423.746, "dur": 69.942, + "args": { + "External id": 973699,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345936357530.556, "dur": 35.166, + "args": { + "External id": 973700,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936357577.614, "dur": 52.152, + "args": { + "External id": 973701,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936357638.447, "dur": 41.066, + "args": { + "External id": 973702,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936357708.296, "dur": 28.428, + "args": { + "External id": 973703,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936357742.975, "dur": 42.762, + "args": { + "External id": 973704,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936357805.738, "dur": 26.914, + "args": { + "External id": 973705,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12749 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.8)", "pid": 2338710, "tid": 2338710, + "ts": 6345936358003.577, "dur": 155.997, + "args": { + "External id": 973706,"Record function id": 0, "Ev Idx": 12750 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345936358257.623, "dur": 58.230, + "args": { + "External id": 973707,"Record function id": 0, "Ev Idx": 12751 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.9)", "pid": 2338710, "tid": 2338710, + "ts": 6345936358326.585, "dur": 31165.038, + "args": { + "External id": 973708,"Record function id": 0, "Ev Idx": 12752 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2338710, "tid": 2338710, + "ts": 6345936358339.114, "dur": 1157.521, + "args": { + "External id": 973709,"Record function id": 0, "Ev Idx": 12753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936358432.973, "dur": 12.287, + "args": { + "External id": 973710,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936358463.108, "dur": 44.220, + "args": { + "External id": 973711,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936358469.242, "dur": 2.815, + "args": { + "External id": 973712,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936358476.537, "dur": 0.511, + "args": { + "External id": 973713,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936358480.667, "dur": 0.348, + "args": { + "External id": 973714,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936358481.999, "dur": 0.679, + "args": { + "External id": 973715,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936358485.676, "dur": 0.770, + "args": { + "External id": 973716,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936358489.517, "dur": 0.523, + "args": { + "External id": 973717,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936358491.018, "dur": 5.392, + "args": { + "External id": 973718,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936358499.042, "dur": 0.597, + "args": { + "External id": 973719,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936358500.366, "dur": 0.507, + "args": { + "External id": 973720,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936358518.795, "dur": 69.326, + "args": { + "External id": 973721,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345936358632.120, "dur": 163.071, + "args": { + "External id": 973722,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936358649.019, "dur": 4.714, + "args": { + "External id": 973723,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345936358659.710, "dur": 14.479, + "args": { + "External id": 973724,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936358667.754, "dur": 5.912, + "args": { + "External id": 973725,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936358671.548, "dur": 0.671, + "args": { + "External id": 973726,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936358681.748, "dur": 36.799, + "args": { + "External id": 973727,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936358685.320, "dur": 2.626, + "args": { + "External id": 973728,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936358689.140, "dur": 0.290, + "args": { + "External id": 973729,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936358690.274, "dur": 0.579, + "args": { + "External id": 973730,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936358695.854, "dur": 2.514, + "args": { + "External id": 973731,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936358703.249, "dur": 0.538, + "args": { + "External id": 973732,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936358706.212, "dur": 0.547, + "args": { + "External id": 973733,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936358709.473, "dur": 0.547, + "args": { + "External id": 973734,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936358710.763, "dur": 0.456, + "args": { + "External id": 973735,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936358712.561, "dur": 2.038, + "args": { + "External id": 973736,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936358741.242, "dur": 44.219, + "args": { + "External id": 973737,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345936358858.874, "dur": 515.033, + "args": { + "External id": 973738,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936358896.762, "dur": 469.847, + "args": { + "External id": 973739,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12783, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345936358911.017, "dur": 448.096, + "args": { + "External id": 973740,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936359406.138, "dur": 3.259, + "args": { + "External id": 973741,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12785, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2338710, "tid": 2338710, + "ts": 6345936359520.593, "dur": 29714.401, + "args": { + "External id": 973742,"Record function id": 0, "Ev Idx": 12786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936359632.304, "dur": 7.429, + "args": { + "External id": 973743,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936359643.888, "dur": 1.241, + "args": { + "External id": 973744,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936359648.141, "dur": 3.262, + "args": { + "External id": 973745,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936359653.625, "dur": 0.953, + "args": { + "External id": 973746,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936359656.697, "dur": 1.225, + "args": { + "External id": 973747,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936359659.675, "dur": 0.974, + "args": { + "External id": 973748,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936359664.625, "dur": 0.972, + "args": { + "External id": 973749,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936359667.531, "dur": 2.455, + "args": { + "External id": 973750,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936359671.539, "dur": 0.742, + "args": { + "External id": 973751,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936359674.063, "dur": 1.020, + "args": { + "External id": 973752,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936359698.449, "dur": 29477.334, + "args": { + "External id": 973753,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936359717.466, "dur": 29447.697, + "args": { + "External id": 973754,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936359736.206, "dur": 20.186, + "args": { + "External id": 973755,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936359760.630, "dur": 29356.596, + "args": { + "External id": 973756,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936359764.597, "dur": 29350.822, + "args": { + "External id": 973757,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936359771.757, "dur": 7.168, + "args": { + "External id": 973758,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936359781.489, "dur": 29330.208, + "args": { + "External id": 973759,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936389419.570, "dur": 40.144, + "args": { + "External id": 973760,"Sequence number": 10552261, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12804 + } + }, + { + "ph": "s", "id": 205, "pid": 2338710, "tid": 2338710, "ts": 6345936389419.570, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936389441.474, "dur": 11.747, + "args": { + "External id": 973761,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936389447.186, "dur": 5.862, + "args": { + "External id": 973762,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936389544.988, "dur": 89.407, + "args": { + "External id": 973763,"Record function id": 0, "Ev Idx": 12807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345936389636.981, "dur": 1355.691, + "args": { + "External id": 973764,"Record function id": 0, "Ev Idx": 12808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936389685.777, "dur": 1289.673, + "args": { + "External id": 973765,"Sequence number": 10552262, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12809 + } + }, + { + "ph": "s", "id": 204, "pid": 2338710, "tid": 2338710, "ts": 6345936389685.777, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936389769.140, "dur": 55.812, + "args": { + "External id": 973766,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936389841.920, "dur": 121.896, + "args": { + "External id": 973767,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936389981.603, "dur": 65.908, + "args": { + "External id": 973768,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936390104.415, "dur": 46.095, + "args": { + "External id": 973769,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936390189.600, "dur": 36.548, + "args": { + "External id": 973770,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345936390249.849, "dur": 23.578, + "args": { + "External id": 973771,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936390301.189, "dur": 161.251, + "args": { + "External id": 973772,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936390361.546, "dur": 16.150, + "args": { + "External id": 973773,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936390369.337, "dur": 7.347, + "args": { + "External id": 973774,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936390382.257, "dur": 5.663, + "args": { + "External id": 973775,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936390389.556, "dur": 1.556, + "args": { + "External id": 973776,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936390394.239, "dur": 5.551, + "args": { + "External id": 973777,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936390475.857, "dur": 64.508, + "args": { + "External id": 973778,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345936390578.788, "dur": 37.614, + "args": { + "External id": 973779,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936390627.844, "dur": 52.401, + "args": { + "External id": 973780,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936390690.505, "dur": 40.862, + "args": { + "External id": 973781,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936390759.396, "dur": 31.276, + "args": { + "External id": 973782,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936390799.813, "dur": 43.446, + "args": { + "External id": 973783,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936390863.999, "dur": 23.494, + "args": { + "External id": 973784,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12828 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.9)", "pid": 2338710, "tid": 2338710, + "ts": 6345936391135.082, "dur": 93.433, + "args": { + "External id": 973785,"Record function id": 0, "Ev Idx": 12829 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345936391322.388, "dur": 59.252, + "args": { + "External id": 973786,"Record function id": 0, "Ev Idx": 12830 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.10)", "pid": 2338710, "tid": 2338710, + "ts": 6345936391392.494, "dur": 31122.618, + "args": { + "External id": 973787,"Record function id": 0, "Ev Idx": 12831 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2338710, "tid": 2338710, + "ts": 6345936391400.590, "dur": 1186.179, + "args": { + "External id": 973788,"Record function id": 0, "Ev Idx": 12832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936391496.030, "dur": 13.457, + "args": { + "External id": 973789,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936391526.127, "dur": 48.438, + "args": { + "External id": 973790,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936391536.363, "dur": 2.735, + "args": { + "External id": 973791,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936391543.802, "dur": 1.034, + "args": { + "External id": 973792,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936391547.342, "dur": 0.479, + "args": { + "External id": 973793,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936391549.377, "dur": 2.765, + "args": { + "External id": 973794,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936391554.169, "dur": 0.545, + "args": { + "External id": 973795,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936391556.418, "dur": 0.385, + "args": { + "External id": 973796,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936391560.635, "dur": 3.071, + "args": { + "External id": 973797,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936391565.330, "dur": 0.551, + "args": { + "External id": 973798,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936391567.492, "dur": 0.380, + "args": { + "External id": 973799,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936391586.821, "dur": 71.121, + "args": { + "External id": 973800,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345936391699.506, "dur": 143.489, + "args": { + "External id": 973801,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936391714.949, "dur": 4.450, + "args": { + "External id": 973802,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345936391725.481, "dur": 17.785, + "args": { + "External id": 973803,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936391734.044, "dur": 8.682, + "args": { + "External id": 973804,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936391738.382, "dur": 2.782, + "args": { + "External id": 973805,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936391751.660, "dur": 35.269, + "args": { + "External id": 973806,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936391754.812, "dur": 0.621, + "args": { + "External id": 973807,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936391757.013, "dur": 0.564, + "args": { + "External id": 973808,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936391759.359, "dur": 0.463, + "args": { + "External id": 973809,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936391763.144, "dur": 2.877, + "args": { + "External id": 973810,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936391767.861, "dur": 0.449, + "args": { + "External id": 973811,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936391770.000, "dur": 2.872, + "args": { + "External id": 973812,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936391775.084, "dur": 0.465, + "args": { + "External id": 973813,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936391777.075, "dur": 0.773, + "args": { + "External id": 973814,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936391781.723, "dur": 0.332, + "args": { + "External id": 973815,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936391798.848, "dur": 35.005, + "args": { + "External id": 973816,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345936391906.766, "dur": 555.376, + "args": { + "External id": 973817,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936391945.251, "dur": 509.741, + "args": { + "External id": 973818,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12862, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345936391957.793, "dur": 489.666, + "args": { + "External id": 973819,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936392495.164, "dur": 2.898, + "args": { + "External id": 973820,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12864, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2338710, "tid": 2338710, + "ts": 6345936392610.992, "dur": 29637.271, + "args": { + "External id": 973821,"Record function id": 0, "Ev Idx": 12865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936392726.975, "dur": 7.817, + "args": { + "External id": 973822,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936392740.208, "dur": 1.188, + "args": { + "External id": 973823,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936392743.571, "dur": 3.918, + "args": { + "External id": 973824,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936392794.268, "dur": 0.932, + "args": { + "External id": 973825,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936392799.262, "dur": 0.927, + "args": { + "External id": 973826,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936392802.418, "dur": 0.778, + "args": { + "External id": 973827,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936392807.336, "dur": 0.890, + "args": { + "External id": 973828,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936392809.866, "dur": 2.075, + "args": { + "External id": 973829,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936392813.618, "dur": 0.773, + "args": { + "External id": 973830,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936392815.922, "dur": 0.788, + "args": { + "External id": 973831,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936392842.019, "dur": 29345.415, + "args": { + "External id": 973832,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936392862.434, "dur": 29313.777, + "args": { + "External id": 973833,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936392880.082, "dur": 19.055, + "args": { + "External id": 973834,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936392903.411, "dur": 29227.508, + "args": { + "External id": 973835,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936392906.308, "dur": 29223.690, + "args": { + "External id": 973836,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936392912.548, "dur": 7.429, + "args": { + "External id": 973837,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936392921.840, "dur": 29202.971, + "args": { + "External id": 973838,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936422439.172, "dur": 40.910, + "args": { + "External id": 973839,"Sequence number": 10552263, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12883 + } + }, + { + "ph": "s", "id": 203, "pid": 2338710, "tid": 2338710, "ts": 6345936422439.172, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936422460.536, "dur": 12.743, + "args": { + "External id": 973840,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936422466.307, "dur": 6.721, + "args": { + "External id": 973841,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936422572.545, "dur": 91.609, + "args": { + "External id": 973842,"Record function id": 0, "Ev Idx": 12886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345936422666.012, "dur": 1423.155, + "args": { + "External id": 973843,"Record function id": 0, "Ev Idx": 12887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936422714.336, "dur": 1317.971, + "args": { + "External id": 973844,"Sequence number": 10552264, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12888 + } + }, + { + "ph": "s", "id": 202, "pid": 2338710, "tid": 2338710, "ts": 6345936422714.336, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936422804.994, "dur": 57.613, + "args": { + "External id": 973845,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936422878.038, "dur": 122.614, + "args": { + "External id": 973846,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936423043.206, "dur": 97.710, + "args": { + "External id": 973847,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936423158.077, "dur": 39.077, + "args": { + "External id": 973848,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936423233.320, "dur": 33.900, + "args": { + "External id": 973849,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345936423291.103, "dur": 21.865, + "args": { + "External id": 973850,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936423342.235, "dur": 162.285, + "args": { + "External id": 973851,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936423404.584, "dur": 15.932, + "args": { + "External id": 973852,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936423412.624, "dur": 7.015, + "args": { + "External id": 973853,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936423424.776, "dur": 5.982, + "args": { + "External id": 973854,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936423432.576, "dur": 1.213, + "args": { + "External id": 973855,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936423438.238, "dur": 5.121, + "args": { + "External id": 973856,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936423518.092, "dur": 62.513, + "args": { + "External id": 973857,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345936423618.979, "dur": 35.733, + "args": { + "External id": 973858,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936423665.543, "dur": 52.110, + "args": { + "External id": 973859,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936423726.677, "dur": 42.573, + "args": { + "External id": 973860,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936423799.284, "dur": 30.886, + "args": { + "External id": 973861,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936423838.693, "dur": 44.246, + "args": { + "External id": 973862,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936423902.760, "dur": 23.330, + "args": { + "External id": 973863,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12907 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.10)", "pid": 2338710, "tid": 2338710, + "ts": 6345936424175.019, "dur": 90.948, + "args": { + "External id": 973864,"Record function id": 0, "Ev Idx": 12908 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345936424356.359, "dur": 55.699, + "args": { + "External id": 973865,"Record function id": 0, "Ev Idx": 12909 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.11)", "pid": 2338710, "tid": 2338710, + "ts": 6345936424422.049, "dur": 31238.401, + "args": { + "External id": 973866,"Record function id": 0, "Ev Idx": 12910 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2338710, "tid": 2338710, + "ts": 6345936424431.305, "dur": 1146.419, + "args": { + "External id": 973867,"Record function id": 0, "Ev Idx": 12911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936424526.617, "dur": 12.184, + "args": { + "External id": 973868,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936424555.661, "dur": 47.059, + "args": { + "External id": 973869,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936424561.850, "dur": 2.893, + "args": { + "External id": 973870,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936424570.115, "dur": 0.590, + "args": { + "External id": 973871,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936424572.766, "dur": 0.642, + "args": { + "External id": 973872,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936424575.442, "dur": 0.612, + "args": { + "External id": 973873,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936424581.005, "dur": 0.779, + "args": { + "External id": 973874,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936424584.523, "dur": 0.773, + "args": { + "External id": 973875,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936424587.455, "dur": 3.216, + "args": { + "External id": 973876,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936424592.612, "dur": 0.591, + "args": { + "External id": 973877,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936424594.870, "dur": 0.727, + "args": { + "External id": 973878,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936424620.529, "dur": 70.029, + "args": { + "External id": 973879,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345936424734.818, "dur": 148.468, + "args": { + "External id": 973880,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936424750.646, "dur": 6.539, + "args": { + "External id": 973881,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345936424763.802, "dur": 16.016, + "args": { + "External id": 973882,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936424772.162, "dur": 7.145, + "args": { + "External id": 973883,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936424776.757, "dur": 1.018, + "args": { + "External id": 973884,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936424787.646, "dur": 36.123, + "args": { + "External id": 973885,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936424790.154, "dur": 0.685, + "args": { + "External id": 973886,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936424792.779, "dur": 2.313, + "args": { + "External id": 973887,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936424796.827, "dur": 0.522, + "args": { + "External id": 973888,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936424799.269, "dur": 2.665, + "args": { + "External id": 973889,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936424806.366, "dur": 0.324, + "args": { + "External id": 973890,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936424808.445, "dur": 0.548, + "args": { + "External id": 973891,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936424811.265, "dur": 0.591, + "args": { + "External id": 973892,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936424815.681, "dur": 0.595, + "args": { + "External id": 973893,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936424818.256, "dur": 0.681, + "args": { + "External id": 973894,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936424837.916, "dur": 37.060, + "args": { + "External id": 973895,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345936424947.397, "dur": 506.274, + "args": { + "External id": 973896,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936424987.624, "dur": 459.140, + "args": { + "External id": 973897,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12941, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345936425000.672, "dur": 438.732, + "args": { + "External id": 973898,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936425486.002, "dur": 2.728, + "args": { + "External id": 973899,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12943, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2338710, "tid": 2338710, + "ts": 6345936425601.506, "dur": 29791.726, + "args": { + "External id": 973900,"Record function id": 0, "Ev Idx": 12944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936425718.919, "dur": 7.449, + "args": { + "External id": 973901,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936425731.487, "dur": 1.328, + "args": { + "External id": 973902,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936425735.080, "dur": 3.204, + "args": { + "External id": 973903,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936425740.935, "dur": 1.304, + "args": { + "External id": 973904,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936425744.065, "dur": 0.901, + "args": { + "External id": 973905,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936425746.818, "dur": 0.920, + "args": { + "External id": 973906,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936425751.741, "dur": 0.811, + "args": { + "External id": 973907,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936425754.390, "dur": 2.380, + "args": { + "External id": 973908,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936425758.538, "dur": 0.768, + "args": { + "External id": 973909,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936425761.629, "dur": 0.863, + "args": { + "External id": 973910,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936425785.749, "dur": 29550.460, + "args": { + "External id": 973911,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936425805.181, "dur": 29520.184, + "args": { + "External id": 973912,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936425827.572, "dur": 20.823, + "args": { + "External id": 973913,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936425854.239, "dur": 29425.357, + "args": { + "External id": 973914,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936425857.459, "dur": 29420.376, + "args": { + "External id": 973915,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936425864.538, "dur": 7.065, + "args": { + "External id": 973916,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936425873.751, "dur": 29400.501, + "args": { + "External id": 973917,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936455581.781, "dur": 45.159, + "args": { + "External id": 973918,"Sequence number": 10552265, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12962 + } + }, + { + "ph": "s", "id": 201, "pid": 2338710, "tid": 2338710, "ts": 6345936455581.781, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936455607.708, "dur": 12.275, + "args": { + "External id": 973919,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936455613.185, "dur": 6.521, + "args": { + "External id": 973920,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936455713.370, "dur": 85.201, + "args": { + "External id": 973921,"Record function id": 0, "Ev Idx": 12965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345936455800.270, "dur": 1395.043, + "args": { + "External id": 973922,"Record function id": 0, "Ev Idx": 12966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936455848.630, "dur": 1328.058, + "args": { + "External id": 973923,"Sequence number": 10552266, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12967 + } + }, + { + "ph": "s", "id": 200, "pid": 2338710, "tid": 2338710, "ts": 6345936455848.630, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936455937.128, "dur": 58.933, + "args": { + "External id": 973924,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936456027.439, "dur": 158.386, + "args": { + "External id": 973925,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936456207.834, "dur": 47.411, + "args": { + "External id": 973926,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936456265.129, "dur": 34.763, + "args": { + "External id": 973927,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936456335.201, "dur": 33.071, + "args": { + "External id": 973928,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345936456391.834, "dur": 21.010, + "args": { + "External id": 973929,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936456441.841, "dur": 163.099, + "args": { + "External id": 973930,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936456504.358, "dur": 15.712, + "args": { + "External id": 973931,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936456511.908, "dur": 7.171, + "args": { + "External id": 973932,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936456524.795, "dur": 4.542, + "args": { + "External id": 973933,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936456531.040, "dur": 1.240, + "args": { + "External id": 973934,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936456534.939, "dur": 6.697, + "args": { + "External id": 973935,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936456618.327, "dur": 60.726, + "args": { + "External id": 973936,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345936456714.720, "dur": 35.807, + "args": { + "External id": 973937,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936456762.849, "dur": 53.365, + "args": { + "External id": 973938,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936456825.788, "dur": 40.849, + "args": { + "External id": 973939,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936456891.803, "dur": 33.458, + "args": { + "External id": 973940,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936456934.205, "dur": 46.154, + "args": { + "External id": 973941,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936457003.106, "dur": 43.961, + "args": { + "External id": 973942,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12986 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.11)", "pid": 2338710, "tid": 2338710, + "ts": 6345936457271.797, "dur": 91.271, + "args": { + "External id": 973943,"Record function id": 0, "Ev Idx": 12987 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345936457450.575, "dur": 57.077, + "args": { + "External id": 973944,"Record function id": 0, "Ev Idx": 12988 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.12)", "pid": 2338710, "tid": 2338710, + "ts": 6345936457518.062, "dur": 31186.341, + "args": { + "External id": 973945,"Record function id": 0, "Ev Idx": 12989 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2338710, "tid": 2338710, + "ts": 6345936457527.081, "dur": 1101.891, + "args": { + "External id": 973946,"Record function id": 0, "Ev Idx": 12990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936457616.236, "dur": 12.023, + "args": { + "External id": 973947,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936457644.951, "dur": 47.865, + "args": { + "External id": 973948,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936457651.255, "dur": 2.661, + "args": { + "External id": 973949,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936457659.216, "dur": 0.353, + "args": { + "External id": 973950,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936457661.488, "dur": 0.437, + "args": { + "External id": 973951,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936457663.852, "dur": 0.591, + "args": { + "External id": 973952,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936457668.115, "dur": 0.341, + "args": { + "External id": 973953,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936457670.272, "dur": 0.420, + "args": { + "External id": 973954,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936457672.214, "dur": 4.809, + "args": { + "External id": 973955,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936457683.607, "dur": 0.316, + "args": { + "External id": 973956,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936457685.340, "dur": 0.256, + "args": { + "External id": 973957,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936457705.093, "dur": 67.395, + "args": { + "External id": 973958,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345936457815.812, "dur": 148.554, + "args": { + "External id": 973959,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936457832.410, "dur": 4.429, + "args": { + "External id": 973960,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345936457842.908, "dur": 17.591, + "args": { + "External id": 973961,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936457851.283, "dur": 8.667, + "args": { + "External id": 973962,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936457855.667, "dur": 2.696, + "args": { + "External id": 973963,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936457868.563, "dur": 33.455, + "args": { + "External id": 973964,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936457871.061, "dur": 0.675, + "args": { + "External id": 973965,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936457873.337, "dur": 0.806, + "args": { + "External id": 973966,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936457875.812, "dur": 0.391, + "args": { + "External id": 973967,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936457879.855, "dur": 3.022, + "args": { + "External id": 973968,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936457884.467, "dur": 0.543, + "args": { + "External id": 973969,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936457886.743, "dur": 3.061, + "args": { + "External id": 973970,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936457890.868, "dur": 0.569, + "args": { + "External id": 973971,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936457893.223, "dur": 0.605, + "args": { + "External id": 973972,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936457897.364, "dur": 0.494, + "args": { + "External id": 973973,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936457914.097, "dur": 40.853, + "args": { + "External id": 973974,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345936458050.642, "dur": 463.621, + "args": { + "External id": 973975,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936458128.782, "dur": 379.115, + "args": { + "External id": 973976,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13020, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345936458142.303, "dur": 356.875, + "args": { + "External id": 973977,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936458541.218, "dur": 2.711, + "args": { + "External id": 973978,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13022, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2338710, "tid": 2338710, + "ts": 6345936458652.680, "dur": 29802.526, + "args": { + "External id": 973979,"Record function id": 0, "Ev Idx": 13023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936458766.317, "dur": 8.089, + "args": { + "External id": 973980,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936458779.328, "dur": 1.664, + "args": { + "External id": 973981,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936458783.237, "dur": 3.666, + "args": { + "External id": 973982,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936458789.335, "dur": 0.765, + "args": { + "External id": 973983,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936458792.236, "dur": 0.811, + "args": { + "External id": 973984,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936458794.873, "dur": 0.874, + "args": { + "External id": 973985,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936458799.816, "dur": 1.088, + "args": { + "External id": 973986,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936458802.848, "dur": 2.329, + "args": { + "External id": 973987,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936458806.890, "dur": 1.435, + "args": { + "External id": 973988,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936458810.019, "dur": 0.823, + "args": { + "External id": 973989,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936458833.934, "dur": 29564.755, + "args": { + "External id": 973990,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936458852.288, "dur": 29535.252, + "args": { + "External id": 973991,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936458875.060, "dur": 20.789, + "args": { + "External id": 973992,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936458900.160, "dur": 29438.904, + "args": { + "External id": 973993,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936458904.530, "dur": 29433.173, + "args": { + "External id": 973994,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936458911.734, "dur": 7.008, + "args": { + "External id": 973995,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936458920.871, "dur": 29412.556, + "args": { + "External id": 973996,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936488633.560, "dur": 39.845, + "args": { + "External id": 973997,"Sequence number": 10552267, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13041 + } + }, + { + "ph": "s", "id": 199, "pid": 2338710, "tid": 2338710, "ts": 6345936488633.560, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936488654.720, "dur": 11.960, + "args": { + "External id": 973998,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936488660.243, "dur": 6.195, + "args": { + "External id": 973999,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936488756.432, "dur": 90.912, + "args": { + "External id": 974000,"Record function id": 0, "Ev Idx": 13044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345936488850.263, "dur": 1425.470, + "args": { + "External id": 974001,"Record function id": 0, "Ev Idx": 13045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936488898.238, "dur": 1359.544, + "args": { + "External id": 974002,"Sequence number": 10552268, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13046 + } + }, + { + "ph": "s", "id": 198, "pid": 2338710, "tid": 2338710, "ts": 6345936488898.238, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936488986.645, "dur": 116.153, + "args": { + "External id": 974003,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936489125.627, "dur": 119.877, + "args": { + "External id": 974004,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936489263.579, "dur": 47.986, + "args": { + "External id": 974005,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936489321.126, "dur": 34.665, + "args": { + "External id": 974006,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936489391.213, "dur": 33.427, + "args": { + "External id": 974007,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345936489449.047, "dur": 21.294, + "args": { + "External id": 974008,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936489498.674, "dur": 162.480, + "args": { + "External id": 974009,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936489562.184, "dur": 15.089, + "args": { + "External id": 974010,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936489569.560, "dur": 6.854, + "args": { + "External id": 974011,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936489581.784, "dur": 5.291, + "args": { + "External id": 974012,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936489588.770, "dur": 1.157, + "args": { + "External id": 974013,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936489593.188, "dur": 5.766, + "args": { + "External id": 974014,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936489675.827, "dur": 61.641, + "args": { + "External id": 974015,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345936489776.498, "dur": 35.756, + "args": { + "External id": 974016,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936489823.129, "dur": 52.136, + "args": { + "External id": 974017,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936489885.250, "dur": 42.364, + "args": { + "External id": 974018,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936489967.180, "dur": 33.659, + "args": { + "External id": 974019,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936490030.168, "dur": 93.377, + "args": { + "External id": 974020,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936490150.137, "dur": 27.048, + "args": { + "External id": 974021,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13065 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.12)", "pid": 2338710, "tid": 2338710, + "ts": 6345936490351.994, "dur": 93.969, + "args": { + "External id": 974022,"Record function id": 0, "Ev Idx": 13066 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345936490534.571, "dur": 54.829, + "args": { + "External id": 974023,"Record function id": 0, "Ev Idx": 13067 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.13)", "pid": 2338710, "tid": 2338710, + "ts": 6345936490599.794, "dur": 30233.868, + "args": { + "External id": 974024,"Record function id": 0, "Ev Idx": 13068 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2338710, "tid": 2338710, + "ts": 6345936490609.071, "dur": 1091.825, + "args": { + "External id": 974025,"Record function id": 0, "Ev Idx": 13069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936490703.917, "dur": 11.462, + "args": { + "External id": 974026,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936490731.923, "dur": 41.403, + "args": { + "External id": 974027,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936490738.011, "dur": 2.579, + "args": { + "External id": 974028,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936490745.557, "dur": 0.648, + "args": { + "External id": 974029,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936490747.824, "dur": 1.031, + "args": { + "External id": 974030,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936490750.348, "dur": 0.406, + "args": { + "External id": 974031,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936490754.577, "dur": 0.557, + "args": { + "External id": 974032,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936490756.427, "dur": 0.644, + "args": { + "External id": 974033,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936490758.330, "dur": 3.340, + "args": { + "External id": 974034,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936490762.857, "dur": 0.678, + "args": { + "External id": 974035,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936490765.052, "dur": 0.724, + "args": { + "External id": 974036,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936490785.378, "dur": 62.347, + "args": { + "External id": 974037,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345936490894.269, "dur": 212.596, + "args": { + "External id": 974038,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936490910.911, "dur": 4.467, + "args": { + "External id": 974039,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345936490921.218, "dur": 12.512, + "args": { + "External id": 974040,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936490926.784, "dur": 6.426, + "args": { + "External id": 974041,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936490931.210, "dur": 0.617, + "args": { + "External id": 974042,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936490941.034, "dur": 33.571, + "args": { + "External id": 974043,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936490943.082, "dur": 2.783, + "args": { + "External id": 974044,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936490947.351, "dur": 0.590, + "args": { + "External id": 974045,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936490949.427, "dur": 0.837, + "args": { + "External id": 974046,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936490954.118, "dur": 2.756, + "args": { + "External id": 974047,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936490958.665, "dur": 0.423, + "args": { + "External id": 974048,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936490960.315, "dur": 0.446, + "args": { + "External id": 974049,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936490963.924, "dur": 0.699, + "args": { + "External id": 974050,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936490966.184, "dur": 0.680, + "args": { + "External id": 974051,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936490968.019, "dur": 2.201, + "args": { + "External id": 974052,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936490988.757, "dur": 104.643, + "args": { + "External id": 974053,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345936491177.086, "dur": 416.267, + "args": { + "External id": 974054,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936491216.413, "dur": 371.532, + "args": { + "External id": 974055,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13099, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345936491228.796, "dur": 353.288, + "args": { + "External id": 974056,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936491618.645, "dur": 2.964, + "args": { + "External id": 974057,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13101, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2338710, "tid": 2338710, + "ts": 6345936491724.379, "dur": 28859.943, + "args": { + "External id": 974058,"Record function id": 0, "Ev Idx": 13102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936491834.621, "dur": 7.308, + "args": { + "External id": 974059,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936491846.214, "dur": 1.140, + "args": { + "External id": 974060,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936491849.440, "dur": 4.141, + "args": { + "External id": 974061,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936491855.568, "dur": 1.180, + "args": { + "External id": 974062,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936491858.328, "dur": 0.845, + "args": { + "External id": 974063,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936491861.112, "dur": 1.142, + "args": { + "External id": 974064,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936491866.232, "dur": 1.234, + "args": { + "External id": 974065,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936491868.949, "dur": 2.312, + "args": { + "External id": 974066,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936491873.323, "dur": 1.113, + "args": { + "External id": 974067,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936491876.166, "dur": 0.946, + "args": { + "External id": 974068,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936491900.411, "dur": 28628.715, + "args": { + "External id": 974069,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936491919.309, "dur": 28599.221, + "args": { + "External id": 974070,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936491939.919, "dur": 20.902, + "args": { + "External id": 974071,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936491966.186, "dur": 28508.039, + "args": { + "External id": 974072,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936491969.338, "dur": 28503.178, + "args": { + "External id": 974073,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936491976.028, "dur": 5.702, + "args": { + "External id": 974074,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936491983.920, "dur": 28484.437, + "args": { + "External id": 974075,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936520762.619, "dur": 39.226, + "args": { + "External id": 974076,"Sequence number": 10552269, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13120 + } + }, + { + "ph": "s", "id": 197, "pid": 2338710, "tid": 2338710, "ts": 6345936520762.619, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936520783.306, "dur": 12.168, + "args": { + "External id": 974077,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936520789.075, "dur": 6.150, + "args": { + "External id": 974078,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936520884.976, "dur": 88.178, + "args": { + "External id": 974079,"Record function id": 0, "Ev Idx": 13123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345936520974.826, "dur": 1380.667, + "args": { + "External id": 974080,"Record function id": 0, "Ev Idx": 13124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936521039.470, "dur": 1297.958, + "args": { + "External id": 974081,"Sequence number": 10552270, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13125 + } + }, + { + "ph": "s", "id": 196, "pid": 2338710, "tid": 2338710, "ts": 6345936521039.470, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936521163.107, "dur": 61.326, + "args": { + "External id": 974082,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936521240.957, "dur": 121.911, + "args": { + "External id": 974083,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936521379.270, "dur": 42.516, + "args": { + "External id": 974084,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936521433.457, "dur": 35.427, + "args": { + "External id": 974085,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936521500.857, "dur": 32.541, + "args": { + "External id": 974086,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345936521556.617, "dur": 21.987, + "args": { + "External id": 974087,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936521606.941, "dur": 162.011, + "args": { + "External id": 974088,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936521668.322, "dur": 15.154, + "args": { + "External id": 974089,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936521675.812, "dur": 6.684, + "args": { + "External id": 974090,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936521688.087, "dur": 5.839, + "args": { + "External id": 974091,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936521695.509, "dur": 1.408, + "args": { + "External id": 974092,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936521699.798, "dur": 5.685, + "args": { + "External id": 974093,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936521782.467, "dur": 57.751, + "args": { + "External id": 974094,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345936521873.566, "dur": 31.987, + "args": { + "External id": 974095,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936521917.207, "dur": 51.525, + "args": { + "External id": 974096,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936521978.518, "dur": 61.566, + "args": { + "External id": 974097,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936522111.153, "dur": 35.234, + "args": { + "External id": 974098,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936522156.485, "dur": 49.552, + "args": { + "External id": 974099,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936522227.264, "dur": 23.877, + "args": { + "External id": 974100,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13144 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.13)", "pid": 2338710, "tid": 2338710, + "ts": 6345936522431.453, "dur": 90.831, + "args": { + "External id": 974101,"Record function id": 0, "Ev Idx": 13145 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345936522611.838, "dur": 55.196, + "args": { + "External id": 974102,"Record function id": 0, "Ev Idx": 13146 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.14)", "pid": 2338710, "tid": 2338710, + "ts": 6345936522677.472, "dur": 31024.318, + "args": { + "External id": 974103,"Record function id": 0, "Ev Idx": 13147 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2338710, "tid": 2338710, + "ts": 6345936522686.267, "dur": 1039.816, + "args": { + "External id": 974104,"Record function id": 0, "Ev Idx": 13148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936522782.965, "dur": 11.310, + "args": { + "External id": 974105,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936522810.915, "dur": 45.318, + "args": { + "External id": 974106,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936522816.749, "dur": 2.709, + "args": { + "External id": 974107,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936522824.418, "dur": 0.627, + "args": { + "External id": 974108,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936522826.366, "dur": 0.842, + "args": { + "External id": 974109,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936522828.760, "dur": 0.714, + "args": { + "External id": 974110,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936522833.068, "dur": 0.492, + "args": { + "External id": 974111,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936522834.868, "dur": 0.334, + "args": { + "External id": 974112,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936522841.097, "dur": 4.415, + "args": { + "External id": 974113,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936522847.060, "dur": 0.535, + "args": { + "External id": 974114,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936522848.982, "dur": 0.377, + "args": { + "External id": 974115,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936522871.252, "dur": 65.931, + "args": { + "External id": 974116,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345936522979.954, "dur": 216.938, + "args": { + "External id": 974117,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936522994.129, "dur": 6.156, + "args": { + "External id": 974118,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345936523030.727, "dur": 15.972, + "args": { + "External id": 974119,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936523038.314, "dur": 7.720, + "args": { + "External id": 974120,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936523042.974, "dur": 0.742, + "args": { + "External id": 974121,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936523096.875, "dur": 33.769, + "args": { + "External id": 974122,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936523099.664, "dur": 0.802, + "args": { + "External id": 974123,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936523102.303, "dur": 2.686, + "args": { + "External id": 974124,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936523106.398, "dur": 0.655, + "args": { + "External id": 974125,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936523108.513, "dur": 3.065, + "args": { + "External id": 974126,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936523114.973, "dur": 0.402, + "args": { + "External id": 974127,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936523117.151, "dur": 0.557, + "args": { + "External id": 974128,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936523118.888, "dur": 0.540, + "args": { + "External id": 974129,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936523122.705, "dur": 0.564, + "args": { + "External id": 974130,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936523124.385, "dur": 0.583, + "args": { + "External id": 974131,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936523145.073, "dur": 40.890, + "args": { + "External id": 974132,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345936523265.312, "dur": 351.748, + "args": { + "External id": 974133,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936523303.049, "dur": 308.286, + "args": { + "External id": 974134,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13178, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345936523315.074, "dur": 290.382, + "args": { + "External id": 974135,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936523643.329, "dur": 2.335, + "args": { + "External id": 974136,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13180, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2338710, "tid": 2338710, + "ts": 6345936523748.538, "dur": 29698.973, + "args": { + "External id": 974137,"Record function id": 0, "Ev Idx": 13181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936523856.492, "dur": 6.533, + "args": { + "External id": 974138,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936523867.143, "dur": 1.186, + "args": { + "External id": 974139,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936523870.314, "dur": 3.674, + "args": { + "External id": 974140,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936523875.945, "dur": 1.220, + "args": { + "External id": 974141,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936523878.999, "dur": 1.012, + "args": { + "External id": 974142,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936523881.611, "dur": 0.798, + "args": { + "External id": 974143,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936523886.537, "dur": 0.998, + "args": { + "External id": 974144,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936523889.050, "dur": 2.135, + "args": { + "External id": 974145,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936523893.041, "dur": 0.742, + "args": { + "External id": 974146,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936523895.101, "dur": 1.017, + "args": { + "External id": 974147,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936523918.958, "dur": 29469.756, + "args": { + "External id": 974148,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936523937.196, "dur": 29440.722, + "args": { + "External id": 974149,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936523956.165, "dur": 19.377, + "args": { + "External id": 974150,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936523980.079, "dur": 29352.689, + "args": { + "External id": 974151,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936523983.121, "dur": 29348.858, + "args": { + "External id": 974152,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936523990.658, "dur": 6.505, + "args": { + "External id": 974153,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936523999.410, "dur": 29327.409, + "args": { + "External id": 974154,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936553626.322, "dur": 42.629, + "args": { + "External id": 974155,"Sequence number": 10552271, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13199 + } + }, + { + "ph": "s", "id": 195, "pid": 2338710, "tid": 2338710, "ts": 6345936553626.322, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936553650.113, "dur": 12.596, + "args": { + "External id": 974156,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936553655.967, "dur": 6.519, + "args": { + "External id": 974157,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936553757.151, "dur": 90.015, + "args": { + "External id": 974158,"Record function id": 0, "Ev Idx": 13202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345936553848.804, "dur": 1395.578, + "args": { + "External id": 974159,"Record function id": 0, "Ev Idx": 13203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936553897.846, "dur": 1330.489, + "args": { + "External id": 974160,"Sequence number": 10552272, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13204 + } + }, + { + "ph": "s", "id": 194, "pid": 2338710, "tid": 2338710, "ts": 6345936553897.846, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936553990.337, "dur": 115.238, + "args": { + "External id": 974161,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936554128.250, "dur": 116.756, + "args": { + "External id": 974162,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936554260.673, "dur": 46.229, + "args": { + "External id": 974163,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936554318.905, "dur": 34.972, + "args": { + "External id": 974164,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936554387.950, "dur": 34.896, + "args": { + "External id": 974165,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345936554447.151, "dur": 20.948, + "args": { + "External id": 974166,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936554496.118, "dur": 162.570, + "args": { + "External id": 974167,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936554558.256, "dur": 16.049, + "args": { + "External id": 974168,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936554565.734, "dur": 7.558, + "args": { + "External id": 974169,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936554579.035, "dur": 5.439, + "args": { + "External id": 974170,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936554586.322, "dur": 1.884, + "args": { + "External id": 974171,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936554591.629, "dur": 5.471, + "args": { + "External id": 974172,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936554679.228, "dur": 61.298, + "args": { + "External id": 974173,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345936554779.784, "dur": 33.638, + "args": { + "External id": 974174,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936554827.083, "dur": 49.164, + "args": { + "External id": 974175,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936554886.151, "dur": 40.697, + "args": { + "External id": 974176,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936554956.060, "dur": 28.830, + "args": { + "External id": 974177,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936554991.543, "dur": 98.272, + "args": { + "External id": 974178,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936555123.307, "dur": 24.640, + "args": { + "External id": 974179,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13223 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.14)", "pid": 2338710, "tid": 2338710, + "ts": 6345936555320.671, "dur": 89.104, + "args": { + "External id": 974180,"Record function id": 0, "Ev Idx": 13224 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345936555495.931, "dur": 57.315, + "args": { + "External id": 974181,"Record function id": 0, "Ev Idx": 13225 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.15)", "pid": 2338710, "tid": 2338710, + "ts": 6345936555563.066, "dur": 30192.685, + "args": { + "External id": 974182,"Record function id": 0, "Ev Idx": 13226 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2338710, "tid": 2338710, + "ts": 6345936555573.792, "dur": 1110.243, + "args": { + "External id": 974183,"Record function id": 0, "Ev Idx": 13227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936555669.062, "dur": 12.300, + "args": { + "External id": 974184,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936555697.895, "dur": 45.237, + "args": { + "External id": 974185,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936555704.765, "dur": 2.858, + "args": { + "External id": 974186,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936555712.446, "dur": 0.736, + "args": { + "External id": 974187,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936555715.071, "dur": 0.725, + "args": { + "External id": 974188,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936555717.652, "dur": 0.715, + "args": { + "External id": 974189,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936555721.751, "dur": 0.865, + "args": { + "External id": 974190,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936555724.242, "dur": 0.499, + "args": { + "External id": 974191,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936555726.714, "dur": 4.796, + "args": { + "External id": 974192,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936555733.709, "dur": 0.374, + "args": { + "External id": 974193,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936555736.155, "dur": 0.300, + "args": { + "External id": 974194,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936555755.219, "dur": 60.814, + "args": { + "External id": 974195,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345936555856.487, "dur": 184.974, + "args": { + "External id": 974196,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936555871.491, "dur": 5.154, + "args": { + "External id": 974197,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345936555882.633, "dur": 12.603, + "args": { + "External id": 974198,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936555888.330, "dur": 6.397, + "args": { + "External id": 974199,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936555892.768, "dur": 0.551, + "args": { + "External id": 974200,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936555903.137, "dur": 35.474, + "args": { + "External id": 974201,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936555905.862, "dur": 2.625, + "args": { + "External id": 974202,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936555910.048, "dur": 0.583, + "args": { + "External id": 974203,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936555912.607, "dur": 0.752, + "args": { + "External id": 974204,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936555917.103, "dur": 2.988, + "args": { + "External id": 974205,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936555921.931, "dur": 0.590, + "args": { + "External id": 974206,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936555923.927, "dur": 0.343, + "args": { + "External id": 974207,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936555927.658, "dur": 0.273, + "args": { + "External id": 974208,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936555929.557, "dur": 0.556, + "args": { + "External id": 974209,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936555931.681, "dur": 2.080, + "args": { + "External id": 974210,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936555962.958, "dur": 65.994, + "args": { + "External id": 974211,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345936556151.278, "dur": 421.353, + "args": { + "External id": 974212,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936556189.821, "dur": 377.090, + "args": { + "External id": 974213,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13257, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345936556204.351, "dur": 355.938, + "args": { + "External id": 974214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936556599.001, "dur": 2.608, + "args": { + "External id": 974215,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13259, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2338710, "tid": 2338710, + "ts": 6345936556707.450, "dur": 28781.707, + "args": { + "External id": 974216,"Record function id": 0, "Ev Idx": 13260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936556825.824, "dur": 7.790, + "args": { + "External id": 974217,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936556837.907, "dur": 1.175, + "args": { + "External id": 974218,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936556840.912, "dur": 3.637, + "args": { + "External id": 974219,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936556846.670, "dur": 0.997, + "args": { + "External id": 974220,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936556849.269, "dur": 1.135, + "args": { + "External id": 974221,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936556854.372, "dur": 1.265, + "args": { + "External id": 974222,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936556857.551, "dur": 0.802, + "args": { + "External id": 974223,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936556860.281, "dur": 2.240, + "args": { + "External id": 974224,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936556864.287, "dur": 1.077, + "args": { + "External id": 974225,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936556869.478, "dur": 0.898, + "args": { + "External id": 974226,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936556892.023, "dur": 28540.710, + "args": { + "External id": 974227,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936556911.589, "dur": 28510.559, + "args": { + "External id": 974228,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936556932.578, "dur": 21.740, + "args": { + "External id": 974229,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936556959.768, "dur": 28416.057, + "args": { + "External id": 974230,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936556962.932, "dur": 28412.330, + "args": { + "External id": 974231,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936556969.963, "dur": 6.291, + "args": { + "External id": 974232,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936556978.254, "dur": 28391.849, + "args": { + "External id": 974233,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936585669.828, "dur": 47.496, + "args": { + "External id": 974234,"Sequence number": 10552273, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13278 + } + }, + { + "ph": "s", "id": 193, "pid": 2338710, "tid": 2338710, "ts": 6345936585669.828, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936585697.899, "dur": 12.574, + "args": { + "External id": 974235,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936585703.970, "dur": 6.264, + "args": { + "External id": 974236,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936585812.889, "dur": 88.571, + "args": { + "External id": 974237,"Record function id": 0, "Ev Idx": 13281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345936585903.007, "dur": 1402.828, + "args": { + "External id": 974238,"Record function id": 0, "Ev Idx": 13282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936585952.134, "dur": 1337.264, + "args": { + "External id": 974239,"Sequence number": 10552274, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13283 + } + }, + { + "ph": "s", "id": 192, "pid": 2338710, "tid": 2338710, "ts": 6345936585952.134, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936586090.413, "dur": 64.342, + "args": { + "External id": 974240,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936586177.148, "dur": 119.071, + "args": { + "External id": 974241,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936586311.054, "dur": 43.034, + "args": { + "External id": 974242,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936586367.099, "dur": 35.079, + "args": { + "External id": 974243,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936586435.856, "dur": 32.169, + "args": { + "External id": 974244,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345936586493.244, "dur": 20.720, + "args": { + "External id": 974245,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936586541.983, "dur": 161.494, + "args": { + "External id": 974246,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936586604.003, "dur": 15.900, + "args": { + "External id": 974247,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936586611.330, "dur": 7.572, + "args": { + "External id": 974248,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936586624.489, "dur": 5.107, + "args": { + "External id": 974249,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936586631.233, "dur": 1.689, + "args": { + "External id": 974250,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936586636.020, "dur": 5.731, + "args": { + "External id": 974251,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936586718.173, "dur": 58.574, + "args": { + "External id": 974252,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345936586815.667, "dur": 36.067, + "args": { + "External id": 974253,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936586865.583, "dur": 51.685, + "args": { + "External id": 974254,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936586927.235, "dur": 40.649, + "args": { + "External id": 974255,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936586996.814, "dur": 51.037, + "args": { + "External id": 974256,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936587097.447, "dur": 53.553, + "args": { + "External id": 974257,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936587176.271, "dur": 22.718, + "args": { + "External id": 974258,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13302 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.15)", "pid": 2338710, "tid": 2338710, + "ts": 6345936587383.293, "dur": 93.826, + "args": { + "External id": 974259,"Record function id": 0, "Ev Idx": 13303 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345936587567.028, "dur": 57.690, + "args": { + "External id": 974260,"Record function id": 0, "Ev Idx": 13304 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.16)", "pid": 2338710, "tid": 2338710, + "ts": 6345936587634.526, "dur": 31734.004, + "args": { + "External id": 974261,"Record function id": 0, "Ev Idx": 13305 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2338710, "tid": 2338710, + "ts": 6345936587644.275, "dur": 1160.313, + "args": { + "External id": 974262,"Record function id": 0, "Ev Idx": 13306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936587737.936, "dur": 10.484, + "args": { + "External id": 974263,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936587764.552, "dur": 45.995, + "args": { + "External id": 974264,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936587771.059, "dur": 2.705, + "args": { + "External id": 974265,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936587779.330, "dur": 0.597, + "args": { + "External id": 974266,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936587781.855, "dur": 0.638, + "args": { + "External id": 974267,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936587784.439, "dur": 0.645, + "args": { + "External id": 974268,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936587788.769, "dur": 0.673, + "args": { + "External id": 974269,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936587791.324, "dur": 0.811, + "args": { + "External id": 974270,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936587793.920, "dur": 4.325, + "args": { + "External id": 974271,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936587800.292, "dur": 0.630, + "args": { + "External id": 974272,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936587802.675, "dur": 0.670, + "args": { + "External id": 974273,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936587823.278, "dur": 64.567, + "args": { + "External id": 974274,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345936587929.425, "dur": 225.947, + "args": { + "External id": 974275,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936587943.695, "dur": 5.106, + "args": { + "External id": 974276,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345936587957.174, "dur": 16.310, + "args": { + "External id": 974277,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936587966.087, "dur": 6.878, + "args": { + "External id": 974278,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936587970.914, "dur": 0.616, + "args": { + "External id": 974279,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936587981.736, "dur": 62.674, + "args": { + "External id": 974280,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936587984.557, "dur": 2.998, + "args": { + "External id": 974281,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936587989.855, "dur": 0.843, + "args": { + "External id": 974282,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936587992.533, "dur": 0.680, + "args": { + "External id": 974283,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936587997.629, "dur": 3.524, + "args": { + "External id": 974284,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936588002.538, "dur": 0.596, + "args": { + "External id": 974285,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936588004.860, "dur": 0.594, + "args": { + "External id": 974286,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936588029.971, "dur": 0.605, + "args": { + "External id": 974287,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936588034.194, "dur": 0.544, + "args": { + "External id": 974288,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936588036.325, "dur": 2.490, + "args": { + "External id": 974289,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936588098.959, "dur": 45.089, + "args": { + "External id": 974290,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345936588225.595, "dur": 468.949, + "args": { + "External id": 974291,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936588263.235, "dur": 425.190, + "args": { + "External id": 974292,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13336, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345936588275.634, "dur": 406.455, + "args": { + "External id": 974293,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936588722.069, "dur": 2.427, + "args": { + "External id": 974294,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13338, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2338710, "tid": 2338710, + "ts": 6345936588827.870, "dur": 30306.364, + "args": { + "External id": 974295,"Record function id": 0, "Ev Idx": 13339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936588941.808, "dur": 8.223, + "args": { + "External id": 974296,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936588953.663, "dur": 1.224, + "args": { + "External id": 974297,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936588957.037, "dur": 2.857, + "args": { + "External id": 974298,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936588961.748, "dur": 1.076, + "args": { + "External id": 974299,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936588964.515, "dur": 1.092, + "args": { + "External id": 974300,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936588966.974, "dur": 0.933, + "args": { + "External id": 974301,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936588972.509, "dur": 0.953, + "args": { + "External id": 974302,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936588975.345, "dur": 2.558, + "args": { + "External id": 974303,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936588979.762, "dur": 1.232, + "args": { + "External id": 974304,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936588982.670, "dur": 1.009, + "args": { + "External id": 974305,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936589007.287, "dur": 30040.745, + "args": { + "External id": 974306,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936589051.229, "dur": 29985.847, + "args": { + "External id": 974307,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936589114.652, "dur": 22.260, + "args": { + "External id": 974308,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936589142.598, "dur": 29837.274, + "args": { + "External id": 974309,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936589146.026, "dur": 29833.246, + "args": { + "External id": 974310,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936589153.299, "dur": 6.523, + "args": { + "External id": 974311,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936589162.136, "dur": 29811.751, + "args": { + "External id": 974312,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936619301.903, "dur": 38.222, + "args": { + "External id": 974313,"Sequence number": 10552275, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13357 + } + }, + { + "ph": "s", "id": 191, "pid": 2338710, "tid": 2338710, "ts": 6345936619301.903, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936619323.669, "dur": 10.726, + "args": { + "External id": 974314,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936619328.523, "dur": 5.611, + "args": { + "External id": 974315,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936619417.052, "dur": 87.638, + "args": { + "External id": 974316,"Record function id": 0, "Ev Idx": 13360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345936619506.315, "dur": 1364.683, + "args": { + "External id": 974317,"Record function id": 0, "Ev Idx": 13361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936619552.806, "dur": 1301.940, + "args": { + "External id": 974318,"Sequence number": 10552276, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13362 + } + }, + { + "ph": "s", "id": 190, "pid": 2338710, "tid": 2338710, "ts": 6345936619552.806, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936619636.206, "dur": 59.711, + "args": { + "External id": 974319,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936619711.761, "dur": 122.855, + "args": { + "External id": 974320,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936619850.706, "dur": 45.614, + "args": { + "External id": 974321,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936619908.492, "dur": 34.684, + "args": { + "External id": 974322,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936619981.014, "dur": 53.900, + "args": { + "External id": 974323,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345936620101.850, "dur": 26.702, + "args": { + "External id": 974324,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936620157.316, "dur": 158.541, + "args": { + "External id": 974325,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936620220.931, "dur": 15.391, + "args": { + "External id": 974326,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936620228.843, "dur": 6.362, + "args": { + "External id": 974327,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936620239.906, "dur": 5.109, + "args": { + "External id": 974328,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936620246.616, "dur": 1.682, + "args": { + "External id": 974329,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936620251.409, "dur": 6.591, + "args": { + "External id": 974330,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936620328.599, "dur": 67.665, + "args": { + "External id": 974331,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345936620433.887, "dur": 34.664, + "args": { + "External id": 974332,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936620480.212, "dur": 51.248, + "args": { + "External id": 974333,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936620540.261, "dur": 41.621, + "args": { + "External id": 974334,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936620607.983, "dur": 31.817, + "args": { + "External id": 974335,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936620648.154, "dur": 47.261, + "args": { + "External id": 974336,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936620748.519, "dur": 26.795, + "args": { + "External id": 974337,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13381 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.16)", "pid": 2338710, "tid": 2338710, + "ts": 6345936620947.788, "dur": 154.795, + "args": { + "External id": 974338,"Record function id": 0, "Ev Idx": 13382 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345936621197.101, "dur": 59.948, + "args": { + "External id": 974339,"Record function id": 0, "Ev Idx": 13383 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.17)", "pid": 2338710, "tid": 2338710, + "ts": 6345936621268.947, "dur": 30663.992, + "args": { + "External id": 974340,"Record function id": 0, "Ev Idx": 13384 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2338710, "tid": 2338710, + "ts": 6345936621277.783, "dur": 1109.495, + "args": { + "External id": 974341,"Record function id": 0, "Ev Idx": 13385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936621374.369, "dur": 12.181, + "args": { + "External id": 974342,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936621402.017, "dur": 45.416, + "args": { + "External id": 974343,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936621408.916, "dur": 2.892, + "args": { + "External id": 974344,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936621416.953, "dur": 0.573, + "args": { + "External id": 974345,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936621419.352, "dur": 0.471, + "args": { + "External id": 974346,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936621421.484, "dur": 0.416, + "args": { + "External id": 974347,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936621425.215, "dur": 0.507, + "args": { + "External id": 974348,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936621427.476, "dur": 0.648, + "args": { + "External id": 974349,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936621429.815, "dur": 4.934, + "args": { + "External id": 974350,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936621436.697, "dur": 0.612, + "args": { + "External id": 974351,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936621439.144, "dur": 0.658, + "args": { + "External id": 974352,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936621460.269, "dur": 66.255, + "args": { + "External id": 974353,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345936621570.168, "dur": 143.975, + "args": { + "External id": 974354,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936621583.765, "dur": 4.968, + "args": { + "External id": 974355,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345936621594.916, "dur": 13.024, + "args": { + "External id": 974356,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936621600.283, "dur": 7.132, + "args": { + "External id": 974357,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936621605.185, "dur": 0.706, + "args": { + "External id": 974358,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936621615.970, "dur": 36.347, + "args": { + "External id": 974359,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936621618.739, "dur": 2.516, + "args": { + "External id": 974360,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936621623.315, "dur": 0.621, + "args": { + "External id": 974361,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936621625.680, "dur": 0.347, + "args": { + "External id": 974362,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936621629.913, "dur": 2.669, + "args": { + "External id": 974363,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936621634.457, "dur": 0.298, + "args": { + "External id": 974364,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936621636.682, "dur": 0.315, + "args": { + "External id": 974365,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936621640.431, "dur": 0.343, + "args": { + "External id": 974366,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936621642.626, "dur": 0.637, + "args": { + "External id": 974367,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936621645.137, "dur": 2.260, + "args": { + "External id": 974368,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936621667.071, "dur": 38.031, + "args": { + "External id": 974369,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345936621778.848, "dur": 485.565, + "args": { + "External id": 974370,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936621815.222, "dur": 442.707, + "args": { + "External id": 974371,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13415, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345936621827.534, "dur": 422.346, + "args": { + "External id": 974372,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936622298.330, "dur": 3.156, + "args": { + "External id": 974373,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13417, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2338710, "tid": 2338710, + "ts": 6345936622411.691, "dur": 29270.299, + "args": { + "External id": 974374,"Record function id": 0, "Ev Idx": 13418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936622532.049, "dur": 7.777, + "args": { + "External id": 974375,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936622544.239, "dur": 1.222, + "args": { + "External id": 974376,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936622547.552, "dur": 3.501, + "args": { + "External id": 974377,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936622552.829, "dur": 1.250, + "args": { + "External id": 974378,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936622555.600, "dur": 1.073, + "args": { + "External id": 974379,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936622558.046, "dur": 0.999, + "args": { + "External id": 974380,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936622563.379, "dur": 0.883, + "args": { + "External id": 974381,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936622565.900, "dur": 2.657, + "args": { + "External id": 974382,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936622570.254, "dur": 1.140, + "args": { + "External id": 974383,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936622573.207, "dur": 0.646, + "args": { + "External id": 974384,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936622597.884, "dur": 29030.764, + "args": { + "External id": 974385,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936622617.954, "dur": 29000.333, + "args": { + "External id": 974386,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936622643.419, "dur": 19.126, + "args": { + "External id": 974387,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936622666.836, "dur": 28907.769, + "args": { + "External id": 974388,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936622669.893, "dur": 28903.677, + "args": { + "External id": 974389,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936622676.595, "dur": 6.365, + "args": { + "External id": 974390,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936622684.798, "dur": 28884.336, + "args": { + "External id": 974391,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936651858.962, "dur": 41.575, + "args": { + "External id": 974392,"Sequence number": 10552277, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13436 + } + }, + { + "ph": "s", "id": 189, "pid": 2338710, "tid": 2338710, "ts": 6345936651858.962, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936651881.448, "dur": 12.010, + "args": { + "External id": 974393,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936651887.338, "dur": 5.928, + "args": { + "External id": 974394,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936651986.235, "dur": 131.939, + "args": { + "External id": 974395,"Record function id": 0, "Ev Idx": 13439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345936652122.028, "dur": 1372.329, + "args": { + "External id": 974396,"Record function id": 0, "Ev Idx": 13440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936652176.734, "dur": 1300.013, + "args": { + "External id": 974397,"Sequence number": 10552278, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13441 + } + }, + { + "ph": "s", "id": 188, "pid": 2338710, "tid": 2338710, "ts": 6345936652176.734, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936652266.325, "dur": 62.523, + "args": { + "External id": 974398,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936652346.157, "dur": 121.475, + "args": { + "External id": 974399,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936652483.666, "dur": 44.512, + "args": { + "External id": 974400,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936652541.138, "dur": 35.929, + "args": { + "External id": 974401,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936652607.851, "dur": 31.612, + "args": { + "External id": 974402,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345936652664.129, "dur": 22.514, + "args": { + "External id": 974403,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936652715.464, "dur": 157.348, + "args": { + "External id": 974404,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936652774.632, "dur": 15.899, + "args": { + "External id": 974405,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936652782.252, "dur": 7.204, + "args": { + "External id": 974406,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936652795.065, "dur": 4.905, + "args": { + "External id": 974407,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936652801.688, "dur": 1.408, + "args": { + "External id": 974408,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936652805.955, "dur": 5.210, + "args": { + "External id": 974409,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936652885.005, "dur": 58.679, + "args": { + "External id": 974410,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345936652982.421, "dur": 56.938, + "args": { + "External id": 974411,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936653099.448, "dur": 62.109, + "args": { + "External id": 974412,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936653174.815, "dur": 41.931, + "args": { + "External id": 974413,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936653247.398, "dur": 32.587, + "args": { + "External id": 974414,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936653288.758, "dur": 53.924, + "args": { + "External id": 974415,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936653364.254, "dur": 22.723, + "args": { + "External id": 974416,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13460 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.17)", "pid": 2338710, "tid": 2338710, + "ts": 6345936653571.956, "dur": 93.432, + "args": { + "External id": 974417,"Record function id": 0, "Ev Idx": 13461 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345936653756.361, "dur": 58.015, + "args": { + "External id": 974418,"Record function id": 0, "Ev Idx": 13462 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.18)", "pid": 2338710, "tid": 2338710, + "ts": 6345936653824.775, "dur": 31454.576, + "args": { + "External id": 974419,"Record function id": 0, "Ev Idx": 13463 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2338710, "tid": 2338710, + "ts": 6345936653834.052, "dur": 1106.726, + "args": { + "External id": 974420,"Record function id": 0, "Ev Idx": 13464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936653930.198, "dur": 10.704, + "args": { + "External id": 974421,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936653957.551, "dur": 44.699, + "args": { + "External id": 974422,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936653964.232, "dur": 2.297, + "args": { + "External id": 974423,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936653971.325, "dur": 0.769, + "args": { + "External id": 974424,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936653974.103, "dur": 0.558, + "args": { + "External id": 974425,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936653976.761, "dur": 0.928, + "args": { + "External id": 974426,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936653981.137, "dur": 0.490, + "args": { + "External id": 974427,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936653983.402, "dur": 0.865, + "args": { + "External id": 974428,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936653986.049, "dur": 4.071, + "args": { + "External id": 974429,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936653991.940, "dur": 0.676, + "args": { + "External id": 974430,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936653994.105, "dur": 0.515, + "args": { + "External id": 974431,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936654035.691, "dur": 107.052, + "args": { + "External id": 974432,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345936654189.747, "dur": 156.053, + "args": { + "External id": 974433,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936654205.595, "dur": 7.300, + "args": { + "External id": 974434,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345936654219.285, "dur": 13.471, + "args": { + "External id": 974435,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936654224.813, "dur": 7.450, + "args": { + "External id": 974436,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936654229.613, "dur": 0.626, + "args": { + "External id": 974437,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936654241.940, "dur": 40.183, + "args": { + "External id": 974438,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936654244.936, "dur": 2.920, + "args": { + "External id": 974439,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936654249.805, "dur": 0.635, + "args": { + "External id": 974440,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936654252.425, "dur": 0.591, + "args": { + "External id": 974441,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936654260.785, "dur": 2.472, + "args": { + "External id": 974442,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936654264.770, "dur": 0.313, + "args": { + "External id": 974443,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936654266.896, "dur": 0.422, + "args": { + "External id": 974444,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936654270.674, "dur": 0.317, + "args": { + "External id": 974445,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936654272.430, "dur": 0.523, + "args": { + "External id": 974446,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936654275.108, "dur": 2.409, + "args": { + "External id": 974447,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936654295.876, "dur": 40.171, + "args": { + "External id": 974448,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345936654412.337, "dur": 419.522, + "args": { + "External id": 974449,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936654449.225, "dur": 376.699, + "args": { + "External id": 974450,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13494, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345936654461.633, "dur": 357.413, + "args": { + "External id": 974451,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936654860.402, "dur": 2.827, + "args": { + "External id": 974452,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13496, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2338710, "tid": 2338710, + "ts": 6345936654965.792, "dur": 29997.047, + "args": { + "External id": 974453,"Record function id": 0, "Ev Idx": 13497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936655154.435, "dur": 8.115, + "args": { + "External id": 974454,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936655167.100, "dur": 1.340, + "args": { + "External id": 974455,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936655170.555, "dur": 4.208, + "args": { + "External id": 974456,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936655176.686, "dur": 0.786, + "args": { + "External id": 974457,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936655179.307, "dur": 1.123, + "args": { + "External id": 974458,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936655182.054, "dur": 0.916, + "args": { + "External id": 974459,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936655184.652, "dur": 1.435, + "args": { + "External id": 974460,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936655187.992, "dur": 2.226, + "args": { + "External id": 974461,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936655192.030, "dur": 0.959, + "args": { + "External id": 974462,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936655197.055, "dur": 1.160, + "args": { + "External id": 974463,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936655221.456, "dur": 29683.800, + "args": { + "External id": 974464,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936655241.061, "dur": 29653.862, + "args": { + "External id": 974465,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936655264.541, "dur": 19.704, + "args": { + "External id": 974466,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936655288.384, "dur": 29558.674, + "args": { + "External id": 974467,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936655291.711, "dur": 29554.748, + "args": { + "External id": 974468,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936655298.586, "dur": 6.819, + "args": { + "External id": 974469,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936655307.309, "dur": 29534.314, + "args": { + "External id": 974470,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936685197.532, "dur": 45.139, + "args": { + "External id": 974471,"Sequence number": 10552279, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13515 + } + }, + { + "ph": "s", "id": 187, "pid": 2338710, "tid": 2338710, "ts": 6345936685197.532, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936685222.640, "dur": 12.462, + "args": { + "External id": 974472,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936685228.007, "dur": 6.693, + "args": { + "External id": 974473,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936685338.317, "dur": 90.548, + "args": { + "External id": 974474,"Record function id": 0, "Ev Idx": 13518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345936685431.918, "dur": 1364.951, + "args": { + "External id": 974475,"Record function id": 0, "Ev Idx": 13519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936685477.401, "dur": 1302.710, + "args": { + "External id": 974476,"Sequence number": 10552280, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13520 + } + }, + { + "ph": "s", "id": 186, "pid": 2338710, "tid": 2338710, "ts": 6345936685477.401, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936685572.884, "dur": 61.184, + "args": { + "External id": 974477,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936685652.443, "dur": 121.781, + "args": { + "External id": 974478,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936685790.099, "dur": 46.739, + "args": { + "External id": 974479,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936685844.660, "dur": 35.240, + "args": { + "External id": 974480,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936685915.826, "dur": 31.607, + "args": { + "External id": 974481,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345936685968.519, "dur": 22.554, + "args": { + "External id": 974482,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936686033.824, "dur": 217.115, + "args": { + "External id": 974483,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936686140.584, "dur": 18.524, + "args": { + "External id": 974484,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936686148.847, "dur": 8.761, + "args": { + "External id": 974485,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936686163.466, "dur": 5.241, + "args": { + "External id": 974486,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936686170.243, "dur": 2.972, + "args": { + "External id": 974487,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936686176.223, "dur": 6.162, + "args": { + "External id": 974488,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936686266.623, "dur": 70.644, + "args": { + "External id": 974489,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345936686379.103, "dur": 38.625, + "args": { + "External id": 974490,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936686431.269, "dur": 54.279, + "args": { + "External id": 974491,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936686494.915, "dur": 41.096, + "args": { + "External id": 974492,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936686565.782, "dur": 32.162, + "args": { + "External id": 974493,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936686604.775, "dur": 43.603, + "args": { + "External id": 974494,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936686672.924, "dur": 26.898, + "args": { + "External id": 974495,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13539 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.18)", "pid": 2338710, "tid": 2338710, + "ts": 6345936686872.543, "dur": 91.373, + "args": { + "External id": 974496,"Record function id": 0, "Ev Idx": 13540 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345936687119.223, "dur": 60.654, + "args": { + "External id": 974497,"Record function id": 0, "Ev Idx": 13541 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.19)", "pid": 2338710, "tid": 2338710, + "ts": 6345936687191.270, "dur": 31628.960, + "args": { + "External id": 974498,"Record function id": 0, "Ev Idx": 13542 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2338710, "tid": 2338710, + "ts": 6345936687201.693, "dur": 1107.256, + "args": { + "External id": 974499,"Record function id": 0, "Ev Idx": 13543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936687294.372, "dur": 12.805, + "args": { + "External id": 974500,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936687324.073, "dur": 43.969, + "args": { + "External id": 974501,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936687330.742, "dur": 2.429, + "args": { + "External id": 974502,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936687338.197, "dur": 0.464, + "args": { + "External id": 974503,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936687340.524, "dur": 0.466, + "args": { + "External id": 974504,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936687342.534, "dur": 0.643, + "args": { + "External id": 974505,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936687346.917, "dur": 0.987, + "args": { + "External id": 974506,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936687349.499, "dur": 0.659, + "args": { + "External id": 974507,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936687351.618, "dur": 4.925, + "args": { + "External id": 974508,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936687358.421, "dur": 0.472, + "args": { + "External id": 974509,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936687360.481, "dur": 0.376, + "args": { + "External id": 974510,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936687380.865, "dur": 68.126, + "args": { + "External id": 974511,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345936687489.398, "dur": 147.389, + "args": { + "External id": 974512,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936687504.044, "dur": 5.274, + "args": { + "External id": 974513,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345936687515.525, "dur": 12.635, + "args": { + "External id": 974514,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936687520.950, "dur": 6.712, + "args": { + "External id": 974515,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936687525.677, "dur": 0.507, + "args": { + "External id": 974516,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936687537.231, "dur": 35.460, + "args": { + "External id": 974517,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936687539.991, "dur": 2.908, + "args": { + "External id": 974518,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936687544.953, "dur": 0.656, + "args": { + "External id": 974519,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936687547.113, "dur": 0.880, + "args": { + "External id": 974520,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936687551.438, "dur": 3.142, + "args": { + "External id": 974521,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936687556.160, "dur": 0.689, + "args": { + "External id": 974522,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936687558.577, "dur": 0.750, + "args": { + "External id": 974523,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936687562.650, "dur": 0.358, + "args": { + "External id": 974524,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936687564.576, "dur": 0.623, + "args": { + "External id": 974525,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936687566.750, "dur": 2.101, + "args": { + "External id": 974526,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936687587.667, "dur": 39.797, + "args": { + "External id": 974527,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345936687699.739, "dur": 486.193, + "args": { + "External id": 974528,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936687735.297, "dur": 443.692, + "args": { + "External id": 974529,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13573, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345936687747.640, "dur": 423.993, + "args": { + "External id": 974530,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936688220.061, "dur": 2.839, + "args": { + "External id": 974531,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13575, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2338710, "tid": 2338710, + "ts": 6345936688334.558, "dur": 30221.672, + "args": { + "External id": 974532,"Record function id": 0, "Ev Idx": 13576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936688454.995, "dur": 7.902, + "args": { + "External id": 974533,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936688467.078, "dur": 1.122, + "args": { + "External id": 974534,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936688470.429, "dur": 3.840, + "args": { + "External id": 974535,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936688476.183, "dur": 1.124, + "args": { + "External id": 974536,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936688478.914, "dur": 1.046, + "args": { + "External id": 974537,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936688481.542, "dur": 1.234, + "args": { + "External id": 974538,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936688487.208, "dur": 0.920, + "args": { + "External id": 974539,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936688490.081, "dur": 2.779, + "args": { + "External id": 974540,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936688494.785, "dur": 0.866, + "args": { + "External id": 974541,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936688497.537, "dur": 0.923, + "args": { + "External id": 974542,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936688521.320, "dur": 29978.171, + "args": { + "External id": 974543,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936688540.300, "dur": 29948.535, + "args": { + "External id": 974544,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936688566.229, "dur": 21.459, + "args": { + "External id": 974545,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936688591.896, "dur": 29851.822, + "args": { + "External id": 974546,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936688598.948, "dur": 29843.462, + "args": { + "External id": 974547,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936688605.693, "dur": 6.568, + "args": { + "External id": 974548,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936688614.089, "dur": 29824.235, + "args": { + "External id": 974549,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936718737.728, "dur": 46.767, + "args": { + "External id": 974550,"Sequence number": 10552281, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13594 + } + }, + { + "ph": "s", "id": 185, "pid": 2338710, "tid": 2338710, "ts": 6345936718737.728, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936718764.662, "dur": 12.462, + "args": { + "External id": 974551,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936718770.357, "dur": 6.488, + "args": { + "External id": 974552,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936718873.223, "dur": 89.536, + "args": { + "External id": 974553,"Record function id": 0, "Ev Idx": 13597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345936718964.847, "dur": 1456.447, + "args": { + "External id": 974554,"Record function id": 0, "Ev Idx": 13598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936719033.422, "dur": 1369.317, + "args": { + "External id": 974555,"Sequence number": 10552282, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13599 + } + }, + { + "ph": "s", "id": 184, "pid": 2338710, "tid": 2338710, "ts": 6345936719033.422, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936719160.857, "dur": 66.094, + "args": { + "External id": 974556,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936719246.490, "dur": 122.587, + "args": { + "External id": 974557,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936719386.677, "dur": 51.515, + "args": { + "External id": 974558,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936719451.281, "dur": 35.607, + "args": { + "External id": 974559,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936719519.132, "dur": 33.212, + "args": { + "External id": 974560,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345936719577.531, "dur": 23.961, + "args": { + "External id": 974561,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936719629.558, "dur": 163.145, + "args": { + "External id": 974562,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936719690.616, "dur": 16.738, + "args": { + "External id": 974563,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936719698.462, "dur": 7.828, + "args": { + "External id": 974564,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936719711.770, "dur": 5.157, + "args": { + "External id": 974565,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936719718.437, "dur": 1.676, + "args": { + "External id": 974566,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936719723.569, "dur": 6.183, + "args": { + "External id": 974567,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936719806.640, "dur": 59.397, + "args": { + "External id": 974568,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345936719904.055, "dur": 36.052, + "args": { + "External id": 974569,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936719951.595, "dur": 53.289, + "args": { + "External id": 974570,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936720039.032, "dur": 88.124, + "args": { + "External id": 974571,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936720164.955, "dur": 38.852, + "args": { + "External id": 974572,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936720213.654, "dur": 48.975, + "args": { + "External id": 974573,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936720285.880, "dur": 24.125, + "args": { + "External id": 974574,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13618 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.19)", "pid": 2338710, "tid": 2338710, + "ts": 6345936720502.776, "dur": 93.632, + "args": { + "External id": 974575,"Record function id": 0, "Ev Idx": 13619 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345936720690.241, "dur": 57.939, + "args": { + "External id": 974576,"Record function id": 0, "Ev Idx": 13620 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.20)", "pid": 2338710, "tid": 2338710, + "ts": 6345936720759.237, "dur": 31788.993, + "args": { + "External id": 974577,"Record function id": 0, "Ev Idx": 13621 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2338710, "tid": 2338710, + "ts": 6345936720769.024, "dur": 1127.743, + "args": { + "External id": 974578,"Record function id": 0, "Ev Idx": 13622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936720866.053, "dur": 11.433, + "args": { + "External id": 974579,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936720894.277, "dur": 43.095, + "args": { + "External id": 974580,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936720901.014, "dur": 2.812, + "args": { + "External id": 974581,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936720907.926, "dur": 0.497, + "args": { + "External id": 974582,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936720910.191, "dur": 0.664, + "args": { + "External id": 974583,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936720912.369, "dur": 0.800, + "args": { + "External id": 974584,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936720916.435, "dur": 0.697, + "args": { + "External id": 974585,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936720918.872, "dur": 0.669, + "args": { + "External id": 974586,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936720921.132, "dur": 4.574, + "args": { + "External id": 974587,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936720927.728, "dur": 0.704, + "args": { + "External id": 974588,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936720930.270, "dur": 0.707, + "args": { + "External id": 974589,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936720950.160, "dur": 92.242, + "args": { + "External id": 974590,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345936721129.077, "dur": 161.884, + "args": { + "External id": 974591,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936721146.055, "dur": 7.407, + "args": { + "External id": 974592,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345936721159.908, "dur": 14.239, + "args": { + "External id": 974593,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936721165.257, "dur": 8.364, + "args": { + "External id": 974594,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936721170.521, "dur": 0.884, + "args": { + "External id": 974595,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936721183.119, "dur": 34.948, + "args": { + "External id": 974596,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936721186.267, "dur": 2.869, + "args": { + "External id": 974597,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936721190.762, "dur": 0.417, + "args": { + "External id": 974598,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936721192.826, "dur": 0.551, + "args": { + "External id": 974599,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936721197.041, "dur": 2.744, + "args": { + "External id": 974600,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936721201.444, "dur": 0.618, + "args": { + "External id": 974601,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936721203.708, "dur": 0.531, + "args": { + "External id": 974602,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936721207.560, "dur": 0.372, + "args": { + "External id": 974603,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936721209.497, "dur": 0.275, + "args": { + "External id": 974604,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936721211.460, "dur": 1.897, + "args": { + "External id": 974605,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936721237.371, "dur": 44.115, + "args": { + "External id": 974606,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345936721359.175, "dur": 426.143, + "args": { + "External id": 974607,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936721395.750, "dur": 383.889, + "args": { + "External id": 974608,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13652, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345936721409.605, "dur": 361.426, + "args": { + "External id": 974609,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936721814.816, "dur": 2.650, + "args": { + "External id": 974610,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13654, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2338710, "tid": 2338710, + "ts": 6345936721920.770, "dur": 30371.544, + "args": { + "External id": 974611,"Record function id": 0, "Ev Idx": 13655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936722098.860, "dur": 8.888, + "args": { + "External id": 974612,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936722113.262, "dur": 1.217, + "args": { + "External id": 974613,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936722116.759, "dur": 3.317, + "args": { + "External id": 974614,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936722122.006, "dur": 1.091, + "args": { + "External id": 974615,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936722124.940, "dur": 1.149, + "args": { + "External id": 974616,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936722127.445, "dur": 0.901, + "args": { + "External id": 974617,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936722130.663, "dur": 1.374, + "args": { + "External id": 974618,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936722133.970, "dur": 2.336, + "args": { + "External id": 974619,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936722138.133, "dur": 1.016, + "args": { + "External id": 974620,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936722143.176, "dur": 0.916, + "args": { + "External id": 974621,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936722167.904, "dur": 30066.447, + "args": { + "External id": 974622,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936722187.917, "dur": 30035.265, + "args": { + "External id": 974623,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936722210.703, "dur": 21.210, + "args": { + "External id": 974624,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936722237.487, "dur": 29936.689, + "args": { + "External id": 974625,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936722240.667, "dur": 29932.422, + "args": { + "External id": 974626,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936722247.890, "dur": 6.547, + "args": { + "External id": 974627,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936722256.441, "dur": 29912.413, + "args": { + "External id": 974628,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936752473.312, "dur": 45.946, + "args": { + "External id": 974629,"Sequence number": 10552283, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13673 + } + }, + { + "ph": "s", "id": 183, "pid": 2338710, "tid": 2338710, "ts": 6345936752473.312, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936752495.330, "dur": 18.224, + "args": { + "External id": 974630,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936752505.780, "dur": 7.538, + "args": { + "External id": 974631,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936752594.919, "dur": 87.565, + "args": { + "External id": 974632,"Record function id": 0, "Ev Idx": 13676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345936752684.001, "dur": 1355.583, + "args": { + "External id": 974633,"Record function id": 0, "Ev Idx": 13677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936752728.753, "dur": 1272.295, + "args": { + "External id": 974634,"Sequence number": 10552284, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13678 + } + }, + { + "ph": "s", "id": 182, "pid": 2338710, "tid": 2338710, "ts": 6345936752728.753, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936752819.039, "dur": 58.785, + "args": { + "External id": 974635,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936752894.921, "dur": 135.990, + "args": { + "External id": 974636,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936753091.996, "dur": 61.418, + "args": { + "External id": 974637,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936753166.165, "dur": 37.479, + "args": { + "External id": 974638,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936753240.762, "dur": 35.408, + "args": { + "External id": 974639,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345936753301.835, "dur": 21.983, + "args": { + "External id": 974640,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936753353.206, "dur": 162.987, + "args": { + "External id": 974641,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936753414.006, "dur": 16.876, + "args": { + "External id": 974642,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936753422.158, "dur": 7.733, + "args": { + "External id": 974643,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936753435.186, "dur": 5.290, + "args": { + "External id": 974644,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936753442.343, "dur": 1.426, + "args": { + "External id": 974645,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936753448.578, "dur": 6.188, + "args": { + "External id": 974646,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936753528.419, "dur": 62.366, + "args": { + "External id": 974647,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345936753626.473, "dur": 34.625, + "args": { + "External id": 974648,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936753672.706, "dur": 49.101, + "args": { + "External id": 974649,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936753730.513, "dur": 39.352, + "args": { + "External id": 974650,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936753795.252, "dur": 30.586, + "args": { + "External id": 974651,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936753832.244, "dur": 41.627, + "args": { + "External id": 974652,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936753895.834, "dur": 22.709, + "args": { + "External id": 974653,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13697 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.20)", "pid": 2338710, "tid": 2338710, + "ts": 6345936754160.834, "dur": 93.716, + "args": { + "External id": 974654,"Record function id": 0, "Ev Idx": 13698 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345936754346.450, "dur": 57.588, + "args": { + "External id": 974655,"Record function id": 0, "Ev Idx": 13699 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.21)", "pid": 2338710, "tid": 2338710, + "ts": 6345936754414.673, "dur": 32562.396, + "args": { + "External id": 974656,"Record function id": 0, "Ev Idx": 13700 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2338710, "tid": 2338710, + "ts": 6345936754423.773, "dur": 1183.170, + "args": { + "External id": 974657,"Record function id": 0, "Ev Idx": 13701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936754524.303, "dur": 13.009, + "args": { + "External id": 974658,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936754553.107, "dur": 45.016, + "args": { + "External id": 974659,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936754560.220, "dur": 3.040, + "args": { + "External id": 974660,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936754568.015, "dur": 0.618, + "args": { + "External id": 974661,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936754570.246, "dur": 0.686, + "args": { + "External id": 974662,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936754572.656, "dur": 0.599, + "args": { + "External id": 974663,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936754575.819, "dur": 0.856, + "args": { + "External id": 974664,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936754578.573, "dur": 0.725, + "args": { + "External id": 974665,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936754581.243, "dur": 5.319, + "args": { + "External id": 974666,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936754588.487, "dur": 0.752, + "args": { + "External id": 974667,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936754590.870, "dur": 0.710, + "args": { + "External id": 974668,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936754611.250, "dur": 64.941, + "args": { + "External id": 974669,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345936754716.237, "dur": 144.432, + "args": { + "External id": 974670,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936754728.997, "dur": 5.419, + "args": { + "External id": 974671,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345936754740.494, "dur": 12.417, + "args": { + "External id": 974672,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936754745.789, "dur": 6.604, + "args": { + "External id": 974673,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936754750.437, "dur": 0.618, + "args": { + "External id": 974674,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936754761.810, "dur": 36.717, + "args": { + "External id": 974675,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936754764.650, "dur": 2.012, + "args": { + "External id": 974676,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936754768.646, "dur": 0.550, + "args": { + "External id": 974677,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936754770.996, "dur": 0.779, + "args": { + "External id": 974678,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936754776.023, "dur": 2.861, + "args": { + "External id": 974679,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936754780.607, "dur": 0.826, + "args": { + "External id": 974680,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936754783.446, "dur": 0.542, + "args": { + "External id": 974681,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936754787.609, "dur": 0.550, + "args": { + "External id": 974682,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936754790.006, "dur": 0.291, + "args": { + "External id": 974683,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936754791.985, "dur": 2.279, + "args": { + "External id": 974684,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936754813.427, "dur": 38.038, + "args": { + "External id": 974685,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345936754923.765, "dur": 557.512, + "args": { + "External id": 974686,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936754960.357, "dur": 513.614, + "args": { + "External id": 974687,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13731, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345936754983.455, "dur": 482.780, + "args": { + "External id": 974688,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936755514.620, "dur": 3.287, + "args": { + "External id": 974689,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13733, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2338710, "tid": 2338710, + "ts": 6345936755631.647, "dur": 31085.722, + "args": { + "External id": 974690,"Record function id": 0, "Ev Idx": 13734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936755749.986, "dur": 7.683, + "args": { + "External id": 974691,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936755761.327, "dur": 1.216, + "args": { + "External id": 974692,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936755764.535, "dur": 3.569, + "args": { + "External id": 974693,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936755770.120, "dur": 1.146, + "args": { + "External id": 974694,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936755772.852, "dur": 1.034, + "args": { + "External id": 974695,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936755775.566, "dur": 1.072, + "args": { + "External id": 974696,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936755780.551, "dur": 0.768, + "args": { + "External id": 974697,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936755783.033, "dur": 2.501, + "args": { + "External id": 974698,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936755787.364, "dur": 1.029, + "args": { + "External id": 974699,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936755790.168, "dur": 0.901, + "args": { + "External id": 974700,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936755815.438, "dur": 30846.194, + "args": { + "External id": 974701,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936755835.235, "dur": 30815.426, + "args": { + "External id": 974702,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936755856.606, "dur": 21.854, + "args": { + "External id": 974703,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936755883.113, "dur": 30723.306, + "args": { + "External id": 974704,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936755886.355, "dur": 30718.472, + "args": { + "External id": 974705,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936755892.727, "dur": 7.088, + "args": { + "External id": 974706,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936755901.680, "dur": 30698.516, + "args": { + "External id": 974707,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936786899.225, "dur": 45.484, + "args": { + "External id": 974708,"Sequence number": 10552285, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13752 + } + }, + { + "ph": "s", "id": 181, "pid": 2338710, "tid": 2338710, "ts": 6345936786899.225, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936786925.421, "dur": 12.341, + "args": { + "External id": 974709,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936786931.105, "dur": 6.394, + "args": { + "External id": 974710,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936787046.204, "dur": 118.864, + "args": { + "External id": 974711,"Record function id": 0, "Ev Idx": 13755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345936787168.543, "dur": 1367.922, + "args": { + "External id": 974712,"Record function id": 0, "Ev Idx": 13756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936787224.708, "dur": 1294.297, + "args": { + "External id": 974713,"Sequence number": 10552286, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13757 + } + }, + { + "ph": "s", "id": 180, "pid": 2338710, "tid": 2338710, "ts": 6345936787224.708, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936787318.931, "dur": 64.858, + "args": { + "External id": 974714,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936787402.906, "dur": 121.591, + "args": { + "External id": 974715,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936787539.843, "dur": 48.748, + "args": { + "External id": 974716,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936787600.851, "dur": 35.702, + "args": { + "External id": 974717,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936787669.909, "dur": 31.711, + "args": { + "External id": 974718,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345936787724.742, "dur": 19.816, + "args": { + "External id": 974719,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936787770.885, "dur": 158.766, + "args": { + "External id": 974720,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936787831.089, "dur": 17.003, + "args": { + "External id": 974721,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936787839.340, "dur": 7.599, + "args": { + "External id": 974722,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936787852.638, "dur": 4.835, + "args": { + "External id": 974723,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936787859.088, "dur": 1.617, + "args": { + "External id": 974724,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936787863.801, "dur": 5.658, + "args": { + "External id": 974725,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936787943.221, "dur": 57.717, + "args": { + "External id": 974726,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345936788106.832, "dur": 39.012, + "args": { + "External id": 974727,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936788159.931, "dur": 58.067, + "args": { + "External id": 974728,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936788231.392, "dur": 41.820, + "args": { + "External id": 974729,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936788304.214, "dur": 29.111, + "args": { + "External id": 974730,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936788342.382, "dur": 42.678, + "args": { + "External id": 974731,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936788406.054, "dur": 21.053, + "args": { + "External id": 974732,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13776 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.21)", "pid": 2338710, "tid": 2338710, + "ts": 6345936788618.657, "dur": 85.873, + "args": { + "External id": 974733,"Record function id": 0, "Ev Idx": 13777 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345936788796.104, "dur": 54.130, + "args": { + "External id": 974734,"Record function id": 0, "Ev Idx": 13778 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.22)", "pid": 2338710, "tid": 2338710, + "ts": 6345936788860.995, "dur": 31688.657, + "args": { + "External id": 974735,"Record function id": 0, "Ev Idx": 13779 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2338710, "tid": 2338710, + "ts": 6345936788870.313, "dur": 1107.764, + "args": { + "External id": 974736,"Record function id": 0, "Ev Idx": 13780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936788962.367, "dur": 10.773, + "args": { + "External id": 974737,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936788989.540, "dur": 102.995, + "args": { + "External id": 974738,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936788996.297, "dur": 2.921, + "args": { + "External id": 974739,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936789004.134, "dur": 0.507, + "args": { + "External id": 974740,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936789006.617, "dur": 0.664, + "args": { + "External id": 974741,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936789028.384, "dur": 0.724, + "args": { + "External id": 974742,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936789033.975, "dur": 0.706, + "args": { + "External id": 974743,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936789036.727, "dur": 0.811, + "args": { + "External id": 974744,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936789039.628, "dur": 4.115, + "args": { + "External id": 974745,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936789045.425, "dur": 0.516, + "args": { + "External id": 974746,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936789047.691, "dur": 0.344, + "args": { + "External id": 974747,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936789109.061, "dur": 69.873, + "args": { + "External id": 974748,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345936789224.460, "dur": 146.975, + "args": { + "External id": 974749,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936789240.574, "dur": 6.594, + "args": { + "External id": 974750,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345936789253.600, "dur": 13.312, + "args": { + "External id": 974751,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936789259.024, "dur": 7.391, + "args": { + "External id": 974752,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936789264.100, "dur": 0.902, + "args": { + "External id": 974753,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936789275.956, "dur": 35.578, + "args": { + "External id": 974754,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936789279.070, "dur": 2.049, + "args": { + "External id": 974755,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936789283.229, "dur": 0.604, + "args": { + "External id": 974756,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936789285.611, "dur": 0.529, + "args": { + "External id": 974757,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936789290.112, "dur": 2.773, + "args": { + "External id": 974758,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936789294.650, "dur": 0.324, + "args": { + "External id": 974759,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936789296.703, "dur": 0.475, + "args": { + "External id": 974760,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936789300.029, "dur": 0.372, + "args": { + "External id": 974761,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936789302.144, "dur": 0.628, + "args": { + "External id": 974762,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936789304.497, "dur": 2.398, + "args": { + "External id": 974763,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936789325.335, "dur": 36.660, + "args": { + "External id": 974764,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345936789439.870, "dur": 426.669, + "args": { + "External id": 974765,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936789484.883, "dur": 376.085, + "args": { + "External id": 974766,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13810, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345936789497.544, "dur": 356.831, + "args": { + "External id": 974767,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936789896.846, "dur": 2.683, + "args": { + "External id": 974768,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13812, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2338710, "tid": 2338710, + "ts": 6345936790002.546, "dur": 30286.075, + "args": { + "External id": 974769,"Record function id": 0, "Ev Idx": 13813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936790190.342, "dur": 7.678, + "args": { + "External id": 974770,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936790202.299, "dur": 1.175, + "args": { + "External id": 974771,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936790205.644, "dur": 3.710, + "args": { + "External id": 974772,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936790211.727, "dur": 1.006, + "args": { + "External id": 974773,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936790214.139, "dur": 1.018, + "args": { + "External id": 974774,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936790216.783, "dur": 0.816, + "args": { + "External id": 974775,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936790219.937, "dur": 0.789, + "args": { + "External id": 974776,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936790222.942, "dur": 2.173, + "args": { + "External id": 974777,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936790226.867, "dur": 0.659, + "args": { + "External id": 974778,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936790231.329, "dur": 0.669, + "args": { + "External id": 974779,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936790254.845, "dur": 29975.687, + "args": { + "External id": 974780,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936790280.638, "dur": 29939.099, + "args": { + "External id": 974781,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936790296.645, "dur": 19.927, + "args": { + "External id": 974782,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936790320.691, "dur": 29856.231, + "args": { + "External id": 974783,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936790323.858, "dur": 29852.099, + "args": { + "External id": 974784,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936790330.919, "dur": 6.297, + "args": { + "External id": 974785,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936790338.854, "dur": 29832.503, + "args": { + "External id": 974786,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936820470.088, "dur": 45.087, + "args": { + "External id": 974787,"Sequence number": 10552287, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13831 + } + }, + { + "ph": "s", "id": 179, "pid": 2338710, "tid": 2338710, "ts": 6345936820470.088, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936820493.431, "dur": 14.801, + "args": { + "External id": 974788,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936820502.634, "dur": 5.318, + "args": { + "External id": 974789,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936820601.904, "dur": 87.827, + "args": { + "External id": 974790,"Record function id": 0, "Ev Idx": 13834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345936820691.379, "dur": 1352.759, + "args": { + "External id": 974791,"Record function id": 0, "Ev Idx": 13835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936820735.898, "dur": 1270.015, + "args": { + "External id": 974792,"Sequence number": 10552288, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13836 + } + }, + { + "ph": "s", "id": 178, "pid": 2338710, "tid": 2338710, "ts": 6345936820735.898, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936820820.966, "dur": 55.853, + "args": { + "External id": 974793,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936820893.004, "dur": 138.699, + "args": { + "External id": 974794,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936821089.322, "dur": 60.176, + "args": { + "External id": 974795,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936821164.677, "dur": 35.607, + "args": { + "External id": 974796,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936821238.789, "dur": 33.977, + "args": { + "External id": 974797,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345936821296.720, "dur": 20.239, + "args": { + "External id": 974798,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936821343.356, "dur": 163.394, + "args": { + "External id": 974799,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936821406.053, "dur": 16.843, + "args": { + "External id": 974800,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936821414.266, "dur": 7.636, + "args": { + "External id": 974801,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936821427.129, "dur": 4.515, + "args": { + "External id": 974802,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936821433.149, "dur": 1.331, + "args": { + "External id": 974803,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936821437.314, "dur": 6.119, + "args": { + "External id": 974804,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936821518.545, "dur": 61.615, + "args": { + "External id": 974805,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345936821616.803, "dur": 34.308, + "args": { + "External id": 974806,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936821662.622, "dur": 48.895, + "args": { + "External id": 974807,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936821722.116, "dur": 40.701, + "args": { + "External id": 974808,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936821789.810, "dur": 34.028, + "args": { + "External id": 974809,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936821832.038, "dur": 42.638, + "args": { + "External id": 974810,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936821896.300, "dur": 22.062, + "args": { + "External id": 974811,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13855 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.22)", "pid": 2338710, "tid": 2338710, + "ts": 6345936822165.634, "dur": 89.211, + "args": { + "External id": 974812,"Record function id": 0, "Ev Idx": 13856 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345936822348.754, "dur": 56.851, + "args": { + "External id": 974813,"Record function id": 0, "Ev Idx": 13857 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.23)", "pid": 2338710, "tid": 2338710, + "ts": 6345936822416.331, "dur": 32512.229, + "args": { + "External id": 974814,"Record function id": 0, "Ev Idx": 13858 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2338710, "tid": 2338710, + "ts": 6345936822426.562, "dur": 1102.397, + "args": { + "External id": 974815,"Record function id": 0, "Ev Idx": 13859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936822524.710, "dur": 12.728, + "args": { + "External id": 974816,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936822555.370, "dur": 43.727, + "args": { + "External id": 974817,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936822562.030, "dur": 2.826, + "args": { + "External id": 974818,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936822569.429, "dur": 0.476, + "args": { + "External id": 974819,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936822571.852, "dur": 0.539, + "args": { + "External id": 974820,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936822573.965, "dur": 0.501, + "args": { + "External id": 974821,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936822577.883, "dur": 0.693, + "args": { + "External id": 974822,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936822580.076, "dur": 0.708, + "args": { + "External id": 974823,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936822582.553, "dur": 4.854, + "args": { + "External id": 974824,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936822589.251, "dur": 0.686, + "args": { + "External id": 974825,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936822591.732, "dur": 0.503, + "args": { + "External id": 974826,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936822611.659, "dur": 66.053, + "args": { + "External id": 974827,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345936822717.258, "dur": 143.603, + "args": { + "External id": 974828,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936822730.976, "dur": 5.092, + "args": { + "External id": 974829,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345936822742.548, "dur": 12.702, + "args": { + "External id": 974830,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936822748.000, "dur": 6.717, + "args": { + "External id": 974831,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936822752.465, "dur": 0.664, + "args": { + "External id": 974832,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936822762.831, "dur": 36.431, + "args": { + "External id": 974833,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936822766.099, "dur": 1.660, + "args": { + "External id": 974834,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936822769.768, "dur": 0.533, + "args": { + "External id": 974835,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936822772.075, "dur": 0.780, + "args": { + "External id": 974836,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936822776.793, "dur": 2.885, + "args": { + "External id": 974837,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936822781.211, "dur": 0.611, + "args": { + "External id": 974838,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936822783.478, "dur": 0.381, + "args": { + "External id": 974839,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936822787.591, "dur": 0.614, + "args": { + "External id": 974840,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936822789.907, "dur": 0.392, + "args": { + "External id": 974841,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936822792.123, "dur": 2.470, + "args": { + "External id": 974842,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936822813.664, "dur": 37.194, + "args": { + "External id": 974843,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345936822922.716, "dur": 488.504, + "args": { + "External id": 974844,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936822960.694, "dur": 444.308, + "args": { + "External id": 974845,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13889, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345936822972.307, "dur": 424.795, + "args": { + "External id": 974846,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936823443.536, "dur": 2.922, + "args": { + "External id": 974847,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13891, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2338710, "tid": 2338710, + "ts": 6345936823553.651, "dur": 31147.639, + "args": { + "External id": 974848,"Record function id": 0, "Ev Idx": 13892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936823711.340, "dur": 7.775, + "args": { + "External id": 974849,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936823723.077, "dur": 1.155, + "args": { + "External id": 974850,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936823725.899, "dur": 3.317, + "args": { + "External id": 974851,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936823733.243, "dur": 0.806, + "args": { + "External id": 974852,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936823735.501, "dur": 0.773, + "args": { + "External id": 974853,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936823737.554, "dur": 0.778, + "args": { + "External id": 974854,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936823740.392, "dur": 0.749, + "args": { + "External id": 974855,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936823745.434, "dur": 2.301, + "args": { + "External id": 974856,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936823749.549, "dur": 0.996, + "args": { + "External id": 974857,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936823752.302, "dur": 0.872, + "args": { + "External id": 974858,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936823776.409, "dur": 30872.812, + "args": { + "External id": 974859,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936823795.331, "dur": 30843.277, + "args": { + "External id": 974860,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936823811.618, "dur": 19.293, + "args": { + "External id": 974861,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936823837.475, "dur": 30758.456, + "args": { + "External id": 974862,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936823840.414, "dur": 30753.767, + "args": { + "External id": 974863,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936823847.172, "dur": 6.620, + "args": { + "External id": 974864,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936823855.493, "dur": 30734.929, + "args": { + "External id": 974865,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936854860.022, "dur": 41.055, + "args": { + "External id": 974866,"Sequence number": 10552289, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13910 + } + }, + { + "ph": "s", "id": 177, "pid": 2338710, "tid": 2338710, "ts": 6345936854860.022, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936854882.941, "dur": 12.687, + "args": { + "External id": 974867,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936854888.904, "dur": 6.436, + "args": { + "External id": 974868,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936854976.941, "dur": 139.074, + "args": { + "External id": 974869,"Record function id": 0, "Ev Idx": 13913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345936855119.220, "dur": 1337.925, + "args": { + "External id": 974870,"Record function id": 0, "Ev Idx": 13914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936855169.550, "dur": 1269.598, + "args": { + "External id": 974871,"Sequence number": 10552290, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13915 + } + }, + { + "ph": "s", "id": 176, "pid": 2338710, "tid": 2338710, "ts": 6345936855169.550, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936855259.175, "dur": 61.436, + "args": { + "External id": 974872,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936855337.257, "dur": 119.735, + "args": { + "External id": 974873,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936855471.880, "dur": 44.099, + "args": { + "External id": 974874,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936855525.590, "dur": 35.787, + "args": { + "External id": 974875,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936855590.755, "dur": 30.687, + "args": { + "External id": 974876,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345936855646.289, "dur": 21.150, + "args": { + "External id": 974877,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936855693.562, "dur": 155.042, + "args": { + "External id": 974878,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936855752.206, "dur": 15.180, + "args": { + "External id": 974879,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936855759.615, "dur": 6.823, + "args": { + "External id": 974880,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936855770.456, "dur": 5.247, + "args": { + "External id": 974881,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936855777.248, "dur": 1.721, + "args": { + "External id": 974882,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936855781.749, "dur": 6.063, + "args": { + "External id": 974883,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936855860.896, "dur": 60.650, + "args": { + "External id": 974884,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345936855958.235, "dur": 37.208, + "args": { + "External id": 974885,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936856006.259, "dur": 120.199, + "args": { + "External id": 974886,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936856142.970, "dur": 46.402, + "args": { + "External id": 974887,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936856217.369, "dur": 35.074, + "args": { + "External id": 974888,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936856260.548, "dur": 43.704, + "args": { + "External id": 974889,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936856326.788, "dur": 22.808, + "args": { + "External id": 974890,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13934 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.23)", "pid": 2338710, "tid": 2338710, + "ts": 6345936856536.407, "dur": 92.592, + "args": { + "External id": 974891,"Record function id": 0, "Ev Idx": 13935 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345936856718.445, "dur": 56.326, + "args": { + "External id": 974892,"Record function id": 0, "Ev Idx": 13936 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.24)", "pid": 2338710, "tid": 2338710, + "ts": 6345936856785.422, "dur": 31311.930, + "args": { + "External id": 974893,"Record function id": 0, "Ev Idx": 13937 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2338710, "tid": 2338710, + "ts": 6345936856795.907, "dur": 1093.591, + "args": { + "External id": 974894,"Record function id": 0, "Ev Idx": 13938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936856890.808, "dur": 10.998, + "args": { + "External id": 974895,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936856916.780, "dur": 41.363, + "args": { + "External id": 974896,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936856923.219, "dur": 2.508, + "args": { + "External id": 974897,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936856930.447, "dur": 0.560, + "args": { + "External id": 974898,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936856932.812, "dur": 0.660, + "args": { + "External id": 974899,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936856935.141, "dur": 0.828, + "args": { + "External id": 974900,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936856939.562, "dur": 0.434, + "args": { + "External id": 974901,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936856941.481, "dur": 0.549, + "args": { + "External id": 974902,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936856943.589, "dur": 3.397, + "args": { + "External id": 974903,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936856948.445, "dur": 0.330, + "args": { + "External id": 974904,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936856950.513, "dur": 0.531, + "args": { + "External id": 974905,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936856971.627, "dur": 122.815, + "args": { + "External id": 974906,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345936857140.553, "dur": 153.303, + "args": { + "External id": 974907,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936857155.402, "dur": 7.262, + "args": { + "External id": 974908,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345936857169.301, "dur": 13.516, + "args": { + "External id": 974909,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936857174.680, "dur": 7.643, + "args": { + "External id": 974910,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936857179.676, "dur": 0.768, + "args": { + "External id": 974911,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936857191.837, "dur": 35.509, + "args": { + "External id": 974912,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936857195.196, "dur": 2.407, + "args": { + "External id": 974913,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936857199.782, "dur": 0.555, + "args": { + "External id": 974914,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936857201.953, "dur": 0.303, + "args": { + "External id": 974915,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936857205.932, "dur": 2.795, + "args": { + "External id": 974916,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936857210.346, "dur": 0.359, + "args": { + "External id": 974917,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936857212.195, "dur": 0.573, + "args": { + "External id": 974918,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936857215.787, "dur": 0.380, + "args": { + "External id": 974919,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936857217.912, "dur": 0.559, + "args": { + "External id": 974920,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936857220.202, "dur": 2.528, + "args": { + "External id": 974921,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936857240.569, "dur": 43.447, + "args": { + "External id": 974922,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345936857361.451, "dur": 419.287, + "args": { + "External id": 974923,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936857398.457, "dur": 376.476, + "args": { + "External id": 974924,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13968, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345936857410.951, "dur": 357.211, + "args": { + "External id": 974925,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936857808.120, "dur": 2.560, + "args": { + "External id": 974926,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13970, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2338710, "tid": 2338710, + "ts": 6345936857912.845, "dur": 29868.339, + "args": { + "External id": 974927,"Record function id": 0, "Ev Idx": 13971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936858089.316, "dur": 9.262, + "args": { + "External id": 974928,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936858104.603, "dur": 1.017, + "args": { + "External id": 974929,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936858107.637, "dur": 3.388, + "args": { + "External id": 974930,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936858112.725, "dur": 0.880, + "args": { + "External id": 974931,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936858127.628, "dur": 1.024, + "args": { + "External id": 974932,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936858132.432, "dur": 1.105, + "args": { + "External id": 974933,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936858137.932, "dur": 0.946, + "args": { + "External id": 974934,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936858140.494, "dur": 2.451, + "args": { + "External id": 974935,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936858144.628, "dur": 1.008, + "args": { + "External id": 974936,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936858147.392, "dur": 0.909, + "args": { + "External id": 974937,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936858175.241, "dur": 29546.390, + "args": { + "External id": 974938,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936858194.376, "dur": 29516.075, + "args": { + "External id": 974939,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936858212.094, "dur": 21.509, + "args": { + "External id": 974940,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936858237.827, "dur": 29424.833, + "args": { + "External id": 974941,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936858240.957, "dur": 29421.029, + "args": { + "External id": 974942,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936858247.135, "dur": 6.928, + "args": { + "External id": 974943,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936858255.914, "dur": 29400.920, + "args": { + "External id": 974944,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936887965.808, "dur": 59.611, + "args": { + "External id": 974945,"Sequence number": 10552291, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13989 + } + }, + { + "ph": "s", "id": 175, "pid": 2338710, "tid": 2338710, "ts": 6345936887965.808, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936887991.074, "dur": 12.119, + "args": { + "External id": 974946,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936887996.524, "dur": 6.399, + "args": { + "External id": 974947,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936888153.150, "dur": 85.037, + "args": { + "External id": 974948,"Record function id": 0, "Ev Idx": 13992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345936888239.811, "dur": 1370.930, + "args": { + "External id": 974949,"Record function id": 0, "Ev Idx": 13993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936888293.084, "dur": 1299.408, + "args": { + "External id": 974950,"Sequence number": 10552292, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13994 + } + }, + { + "ph": "s", "id": 174, "pid": 2338710, "tid": 2338710, "ts": 6345936888293.084, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936888384.871, "dur": 65.360, + "args": { + "External id": 974951,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936888468.996, "dur": 120.241, + "args": { + "External id": 974952,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936888605.805, "dur": 47.057, + "args": { + "External id": 974953,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936888662.895, "dur": 34.243, + "args": { + "External id": 974954,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936888732.884, "dur": 31.102, + "args": { + "External id": 974955,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345936888786.885, "dur": 19.539, + "args": { + "External id": 974956,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 14000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936888833.025, "dur": 157.296, + "args": { + "External id": 974957,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936888891.326, "dur": 16.465, + "args": { + "External id": 974958,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936888899.172, "dur": 7.385, + "args": { + "External id": 974959,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936888912.350, "dur": 4.776, + "args": { + "External id": 974960,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936888918.783, "dur": 1.537, + "args": { + "External id": 974961,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936888923.457, "dur": 5.502, + "args": { + "External id": 974962,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936889003.319, "dur": 127.077, + "args": { + "External id": 974963,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 14007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345936889176.550, "dur": 36.764, + "args": { + "External id": 974964,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 14008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936889225.965, "dur": 57.262, + "args": { + "External id": 974965,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 14009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936889295.464, "dur": 43.429, + "args": { + "External id": 974966,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 14010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936889367.921, "dur": 33.921, + "args": { + "External id": 974967,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 14011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936889410.760, "dur": 44.761, + "args": { + "External id": 974968,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 14012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936889478.885, "dur": 22.955, + "args": { + "External id": 974969,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 14013 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.24)", "pid": 2338710, "tid": 2338710, + "ts": 6345936889695.555, "dur": 92.754, + "args": { + "External id": 974970,"Record function id": 0, "Ev Idx": 14014 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345936889880.465, "dur": 58.042, + "args": { + "External id": 974971,"Record function id": 0, "Ev Idx": 14015 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.25)", "pid": 2338710, "tid": 2338710, + "ts": 6345936889948.929, "dur": 31017.512, + "args": { + "External id": 974972,"Record function id": 0, "Ev Idx": 14016 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.25)", "pid": 2338710, "tid": 2338710, + "ts": 6345936889959.119, "dur": 1176.317, + "args": { + "External id": 974973,"Record function id": 0, "Ev Idx": 14017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936890123.133, "dur": 13.443, + "args": { + "External id": 974974,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936890154.912, "dur": 46.276, + "args": { + "External id": 974975,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 14019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936890161.717, "dur": 2.914, + "args": { + "External id": 974976,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936890168.822, "dur": 0.723, + "args": { + "External id": 974977,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936890171.545, "dur": 0.532, + "args": { + "External id": 974978,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936890178.382, "dur": 0.513, + "args": { + "External id": 974979,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936890181.828, "dur": 0.396, + "args": { + "External id": 974980,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936890183.649, "dur": 0.473, + "args": { + "External id": 974981,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936890185.723, "dur": 4.491, + "args": { + "External id": 974982,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936890191.981, "dur": 0.393, + "args": { + "External id": 974983,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936890193.954, "dur": 0.465, + "args": { + "External id": 974984,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936890215.082, "dur": 70.517, + "args": { + "External id": 974985,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 14029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345936890329.291, "dur": 145.869, + "args": { + "External id": 974986,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 14030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936890343.504, "dur": 5.200, + "args": { + "External id": 974987,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345936890355.833, "dur": 12.257, + "args": { + "External id": 974988,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 14032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936890361.309, "dur": 6.268, + "args": { + "External id": 974989,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 14033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936890365.599, "dur": 0.601, + "args": { + "External id": 974990,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 14034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345936890376.763, "dur": 33.476, + "args": { + "External id": 974991,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 14035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936890379.366, "dur": 2.244, + "args": { + "External id": 974992,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936890383.535, "dur": 0.858, + "args": { + "External id": 974993,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936890386.307, "dur": 0.509, + "args": { + "External id": 974994,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936890390.274, "dur": 2.316, + "args": { + "External id": 974995,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936890394.291, "dur": 0.471, + "args": { + "External id": 974996,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936890396.700, "dur": 0.424, + "args": { + "External id": 974997,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936890399.473, "dur": 0.473, + "args": { + "External id": 974998,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936890401.941, "dur": 0.398, + "args": { + "External id": 974999,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936890404.143, "dur": 1.761, + "args": { + "External id": 975000,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936890425.796, "dur": 39.468, + "args": { + "External id": 975001,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 14045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345936890541.775, "dur": 412.097, + "args": { + "External id": 975002,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 14046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936890578.626, "dur": 369.768, + "args": { + "External id": 975003,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 14047, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345936890591.101, "dur": 351.119, + "args": { + "External id": 975004,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 14048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345936890983.047, "dur": 2.942, + "args": { + "External id": 975005,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 14049, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.25)", "pid": 2338710, "tid": 2338710, + "ts": 6345936891165.013, "dur": 29572.263, + "args": { + "External id": 975006,"Record function id": 0, "Ev Idx": 14050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936891288.099, "dur": 7.943, + "args": { + "External id": 975007,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 14051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936891300.304, "dur": 1.291, + "args": { + "External id": 975008,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936891303.584, "dur": 3.334, + "args": { + "External id": 975009,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 14053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936891308.796, "dur": 1.064, + "args": { + "External id": 975010,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 14054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936891311.352, "dur": 0.968, + "args": { + "External id": 975011,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 14055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936891313.961, "dur": 0.975, + "args": { + "External id": 975012,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 14056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936891316.671, "dur": 0.757, + "args": { + "External id": 975013,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936891319.265, "dur": 2.640, + "args": { + "External id": 975014,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 14058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936891323.723, "dur": 1.052, + "args": { + "External id": 975015,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 14059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936891328.573, "dur": 0.736, + "args": { + "External id": 975016,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 14060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936891351.505, "dur": 29331.701, + "args": { + "External id": 975017,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 14061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936891370.752, "dur": 29301.188, + "args": { + "External id": 975018,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 14062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936891395.101, "dur": 19.804, + "args": { + "External id": 975019,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936891419.418, "dur": 29204.694, + "args": { + "External id": 975020,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 14064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936891422.370, "dur": 29200.425, + "args": { + "External id": 975021,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 14065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936891429.032, "dur": 6.377, + "args": { + "External id": 975022,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936891437.170, "dur": 29180.961, + "args": { + "External id": 975023,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 14067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936920898.617, "dur": 38.529, + "args": { + "External id": 975024,"Sequence number": 10552293, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 14068 + } + }, + { + "ph": "s", "id": 173, "pid": 2338710, "tid": 2338710, "ts": 6345936920898.617, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936920919.300, "dur": 12.243, + "args": { + "External id": 975025,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 14069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936920924.952, "dur": 6.312, + "args": { + "External id": 975026,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936921027.958, "dur": 120.958, + "args": { + "External id": 975027,"Record function id": 0, "Ev Idx": 14071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345936921152.152, "dur": 1317.320, + "args": { + "External id": 975028,"Record function id": 0, "Ev Idx": 14072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936921198.969, "dur": 1253.167, + "args": { + "External id": 975029,"Sequence number": 10552294, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 14073 + } + }, + { + "ph": "s", "id": 172, "pid": 2338710, "tid": 2338710, "ts": 6345936921198.969, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936921288.935, "dur": 63.668, + "args": { + "External id": 975030,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 14074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936921370.877, "dur": 120.847, + "args": { + "External id": 975031,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 14075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936921507.418, "dur": 42.787, + "args": { + "External id": 975032,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 14076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936921560.559, "dur": 34.773, + "args": { + "External id": 975033,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 14077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936921624.837, "dur": 28.782, + "args": { + "External id": 975034,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 14078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345936921682.834, "dur": 22.716, + "args": { + "External id": 975035,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 14079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936921731.210, "dur": 153.666, + "args": { + "External id": 975036,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936921790.106, "dur": 14.791, + "args": { + "External id": 975037,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936921797.697, "dur": 6.080, + "args": { + "External id": 975038,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936921808.322, "dur": 5.030, + "args": { + "External id": 975039,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936921815.230, "dur": 1.517, + "args": { + "External id": 975040,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936921819.588, "dur": 5.786, + "args": { + "External id": 975041,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936921897.078, "dur": 55.782, + "args": { + "External id": 975042,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 14086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345936921988.327, "dur": 53.626, + "args": { + "External id": 975043,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 14087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936922095.719, "dur": 58.940, + "args": { + "External id": 975044,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 14088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936922168.774, "dur": 41.816, + "args": { + "External id": 975045,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 14089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936922237.535, "dur": 34.197, + "args": { + "External id": 975046,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 14090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936922280.598, "dur": 41.664, + "args": { + "External id": 975047,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 14091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345936922345.466, "dur": 20.929, + "args": { + "External id": 975048,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 14092 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.25)", "pid": 2338710, "tid": 2338710, + "ts": 6345936922550.395, "dur": 40.762, + "args": { + "External id": 975049,"Record function id": 0, "Ev Idx": 14093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936922747.203, "dur": 389.530, + "args": { + "External id": 975050,"Sequence number": 10552295, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 14094 + } + }, + { + "ph": "s", "id": 171, "pid": 2338710, "tid": 2338710, "ts": 6345936922747.203, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936922781.132, "dur": 9.438, + "args": { + "External id": 975051,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936922783.744, "dur": 6.528, + "args": { + "External id": 975052,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936922802.792, "dur": 14.224, + "args": { + "External id": 975053,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936922807.090, "dur": 9.140, + "args": { + "External id": 975054,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936922828.157, "dur": 6.113, + "args": { + "External id": 975055,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936923108.887, "dur": 9.084, + "args": { + "External id": 975056,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936923112.677, "dur": 4.798, + "args": { + "External id": 975057,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936923169.765, "dur": 167.209, + "args": { + "External id": 975058,"Sequence number": 10552296, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936923172.528, "dur": 16.184, + "args": { + "External id": 975059,"Sequence number": 10552296, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14103 + } + }, + { + "ph": "s", "id": 170, "pid": 2338710, "tid": 2338710, "ts": 6345936923172.528, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936923178.357, "dur": 8.413, + "args": { + "External id": 975060,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936923184.128, "dur": 2.233, + "args": { + "External id": 975061,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936923191.997, "dur": 144.676, + "args": { + "External id": 975062,"Sequence number": 10552297, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936923195.424, "dur": 5.523, + "args": { + "External id": 975063,"Sequence number": 10552297, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936923196.292, "dur": 4.439, + "args": { + "External id": 975064,"Sequence number": 10552297, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14108 + } + }, + { + "ph": "s", "id": 169, "pid": 2338710, "tid": 2338710, "ts": 6345936923196.292, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936923202.678, "dur": 119.916, + "args": { + "External id": 975065,"Sequence number": 10552298, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14109 + } + }, + { + "ph": "s", "id": 168, "pid": 2338710, "tid": 2338710, "ts": 6345936923202.678, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936923326.688, "dur": 8.819, + "args": { + "External id": 975066,"Sequence number": 10552299, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14110 + } + }, + { + "ph": "s", "id": 167, "pid": 2338710, "tid": 2338710, "ts": 6345936923326.688, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936923354.331, "dur": 79.676, + "args": { + "External id": 975067,"Sequence number": 10552300, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936923355.138, "dur": 8.335, + "args": { + "External id": 975068,"Sequence number": 10552300, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14112 + } + }, + { + "ph": "s", "id": 166, "pid": 2338710, "tid": 2338710, "ts": 6345936923355.138, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936923357.293, "dur": 5.013, + "args": { + "External id": 975069,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936923361.164, "dur": 0.810, + "args": { + "External id": 975070,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936923364.425, "dur": 69.232, + "args": { + "External id": 975071,"Sequence number": 10552301, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936923365.751, "dur": 4.994, + "args": { + "External id": 975072,"Sequence number": 10552301, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936923366.668, "dur": 3.894, + "args": { + "External id": 975073,"Sequence number": 10552301, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14117 + } + }, + { + "ph": "s", "id": 165, "pid": 2338710, "tid": 2338710, "ts": 6345936923366.668, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936923371.525, "dur": 54.803, + "args": { + "External id": 975074,"Sequence number": 10552302, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14118 + } + }, + { + "ph": "s", "id": 164, "pid": 2338710, "tid": 2338710, "ts": 6345936923371.525, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936923428.478, "dur": 4.702, + "args": { + "External id": 975075,"Sequence number": 10552303, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14119 + } + }, + { + "ph": "s", "id": 163, "pid": 2338710, "tid": 2338710, "ts": 6345936923428.478, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936923442.432, "dur": 69.985, + "args": { + "External id": 975076,"Sequence number": 10552304, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936923443.182, "dur": 5.530, + "args": { + "External id": 975077,"Sequence number": 10552304, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14121 + } + }, + { + "ph": "s", "id": 162, "pid": 2338710, "tid": 2338710, "ts": 6345936923443.182, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936923444.932, "dur": 2.647, + "args": { + "External id": 975078,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936923446.767, "dur": 0.660, + "args": { + "External id": 975079,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936923452.277, "dur": 59.837, + "args": { + "External id": 975080,"Sequence number": 10552305, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936923453.378, "dur": 4.928, + "args": { + "External id": 975081,"Sequence number": 10552305, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936923454.295, "dur": 3.846, + "args": { + "External id": 975082,"Sequence number": 10552305, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14126 + } + }, + { + "ph": "s", "id": 161, "pid": 2338710, "tid": 2338710, "ts": 6345936923454.295, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936923458.944, "dur": 44.319, + "args": { + "External id": 975083,"Sequence number": 10552306, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14127 + } + }, + { + "ph": "s", "id": 160, "pid": 2338710, "tid": 2338710, "ts": 6345936923458.944, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936923505.515, "dur": 6.110, + "args": { + "External id": 975084,"Sequence number": 10552307, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14128 + } + }, + { + "ph": "s", "id": 159, "pid": 2338710, "tid": 2338710, "ts": 6345936923505.515, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936923537.447, "dur": 4.685, + "args": { + "External id": 975085,"Sequence number": 10552308, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936923538.979, "dur": 2.989, + "args": { + "External id": 975086,"Sequence number": 10552308, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14130 + } + }, + { + "ph": "s", "id": 158, "pid": 2338710, "tid": 2338710, "ts": 6345936923538.979, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936923551.068, "dur": 3.691, + "args": { + "External id": 975087,"Sequence number": 10552309, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936923552.628, "dur": 1.954, + "args": { + "External id": 975088,"Sequence number": 10552309, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14132 + } + }, + { + "ph": "s", "id": 157, "pid": 2338710, "tid": 2338710, "ts": 6345936923552.628, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936923561.739, "dur": 4.487, + "args": { + "External id": 975089,"Sequence number": 10552310, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936923563.091, "dur": 2.968, + "args": { + "External id": 975090,"Sequence number": 10552310, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14134 + } + }, + { + "ph": "s", "id": 156, "pid": 2338710, "tid": 2338710, "ts": 6345936923563.091, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936923606.835, "dur": 215.068, + "args": { + "External id": 975091,"Sequence number": 10552311, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14135 + } + }, + { + "ph": "s", "id": 155, "pid": 2338710, "tid": 2338710, "ts": 6345936923606.835, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936923634.128, "dur": 11.848, + "args": { + "External id": 975092,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936923638.922, "dur": 6.463, + "args": { + "External id": 975093,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936923838.592, "dur": 138.515, + "args": { + "External id": 975094,"Sequence number": 10552312, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14138 + } + }, + { + "ph": "s", "id": 154, "pid": 2338710, "tid": 2338710, "ts": 6345936923838.592, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936923856.068, "dur": 7.917, + "args": { + "External id": 975095,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 14139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936923858.922, "dur": 4.559, + "args": { + "External id": 975096,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338710, "tid": 2338710, + "ts": 6345936924033.721, "dur": 274.142, + "args": { + "External id": 975097,"Sequence number": 10552313, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 14141 + } + }, + { + "ph": "s", "id": 153, "pid": 2338710, "tid": 2338710, "ts": 6345936924033.721, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936924108.027, "dur": 162.973, + "args": { + "External id": 975098,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936924175.794, "dur": 11.228, + "args": { + "External id": 975099,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936924179.757, "dur": 6.530, + "args": { + "External id": 975100,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936924190.262, "dur": 5.051, + "args": { + "External id": 975101,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936924196.882, "dur": 1.244, + "args": { + "External id": 975102,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936924202.318, "dur": 3.690, + "args": { + "External id": 975103,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2338710, + "ts": 6345936924288.385, "dur": 6.423, + "args": { + "External id": 975104,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 14148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936924314.534, "dur": 7.474, + "args": { + "External id": 975105,"Sequence number": 10552314, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936924316.448, "dur": 5.411, + "args": { + "External id": 975106,"Sequence number": 10552314, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14150 + } + }, + { + "ph": "s", "id": 152, "pid": 2338710, "tid": 2338710, "ts": 6345936924316.448, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936924336.039, "dur": 148.341, + "args": { + "External id": 975107,"Sequence number": 10552315, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936924339.808, "dur": 15.131, + "args": { + "External id": 975108,"Sequence number": 10552315, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14152 + } + }, + { + "ph": "s", "id": 151, "pid": 2338710, "tid": 2338710, "ts": 6345936924339.808, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936924342.882, "dur": 10.886, + "args": { + "External id": 975109,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936924351.407, "dur": 1.941, + "args": { + "External id": 975110,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936924356.653, "dur": 127.426, + "args": { + "External id": 975111,"Sequence number": 10552316, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936924359.302, "dur": 7.580, + "args": { + "External id": 975112,"Sequence number": 10552316, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936924362.758, "dur": 3.956, + "args": { + "External id": 975113,"Sequence number": 10552316, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14157 + } + }, + { + "ph": "s", "id": 150, "pid": 2338710, "tid": 2338710, "ts": 6345936924362.758, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936924367.920, "dur": 106.524, + "args": { + "External id": 975114,"Sequence number": 10552317, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14158 + } + }, + { + "ph": "s", "id": 149, "pid": 2338710, "tid": 2338710, "ts": 6345936924367.920, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936924478.054, "dur": 5.038, + "args": { + "External id": 975115,"Sequence number": 10552318, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14159 + } + }, + { + "ph": "s", "id": 148, "pid": 2338710, "tid": 2338710, "ts": 6345936924478.054, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936924525.654, "dur": 266.784, + "args": { + "External id": 975116,"Sequence number": 10552319, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 14160 + } + }, + { + "ph": "s", "id": 147, "pid": 2338710, "tid": 2338710, "ts": 6345936924525.654, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936924550.991, "dur": 2.827, + "args": { + "External id": 975117,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936924552.070, "dur": 1.474, + "args": { + "External id": 975118,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936924560.874, "dur": 6.091, + "args": { + "External id": 975119,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 14163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936924562.353, "dur": 4.459, + "args": { + "External id": 975120,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936924563.603, "dur": 3.102, + "args": { + "External id": 975121,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936924576.342, "dur": 10.334, + "args": { + "External id": 975122,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936924581.395, "dur": 4.810, + "args": { + "External id": 975123,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936924594.326, "dur": 3.076, + "args": { + "External id": 975124,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936924601.235, "dur": 3.459, + "args": { + "External id": 975125,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936924766.823, "dur": 3.594, + "args": { + "External id": 975126,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936924768.040, "dur": 2.094, + "args": { + "External id": 975127,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936924773.428, "dur": 4.829, + "args": { + "External id": 975128,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936924776.978, "dur": 1.157, + "args": { + "External id": 975129,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936924815.428, "dur": 115.715, + "args": { + "External id": 975130,"Sequence number": 10552320, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936924816.644, "dur": 8.600, + "args": { + "External id": 975131,"Sequence number": 10552320, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14175 + } + }, + { + "ph": "s", "id": 146, "pid": 2338710, "tid": 2338710, "ts": 6345936924816.644, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936924819.714, "dur": 4.261, + "args": { + "External id": 975132,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936924822.148, "dur": 1.560, + "args": { + "External id": 975133,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936924826.286, "dur": 104.532, + "args": { + "External id": 975134,"Sequence number": 10552321, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936924830.822, "dur": 3.920, + "args": { + "External id": 975135,"Sequence number": 10552321, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936924831.846, "dur": 2.745, + "args": { + "External id": 975136,"Sequence number": 10552321, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14180 + } + }, + { + "ph": "s", "id": 145, "pid": 2338710, "tid": 2338710, "ts": 6345936924831.846, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936924835.720, "dur": 83.836, + "args": { + "External id": 975137,"Sequence number": 10552322, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14181 + } + }, + { + "ph": "s", "id": 144, "pid": 2338710, "tid": 2338710, "ts": 6345936924835.720, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936924922.183, "dur": 7.840, + "args": { + "External id": 975138,"Sequence number": 10552323, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14182 + } + }, + { + "ph": "s", "id": 143, "pid": 2338710, "tid": 2338710, "ts": 6345936924922.183, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936924942.895, "dur": 104.504, + "args": { + "External id": 975139,"Sequence number": 10552324, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936924943.764, "dur": 6.558, + "args": { + "External id": 975140,"Sequence number": 10552324, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14184 + } + }, + { + "ph": "s", "id": 142, "pid": 2338710, "tid": 2338710, "ts": 6345936924943.764, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936924945.880, "dur": 3.034, + "args": { + "External id": 975141,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936924947.524, "dur": 1.192, + "args": { + "External id": 975142,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936924951.027, "dur": 96.139, + "args": { + "External id": 975143,"Sequence number": 10552325, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936924954.821, "dur": 6.470, + "args": { + "External id": 975144,"Sequence number": 10552325, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936924955.869, "dur": 5.247, + "args": { + "External id": 975145,"Sequence number": 10552325, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14189 + } + }, + { + "ph": "s", "id": 141, "pid": 2338710, "tid": 2338710, "ts": 6345936924955.869, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936924961.818, "dur": 76.473, + "args": { + "External id": 975146,"Sequence number": 10552326, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14190 + } + }, + { + "ph": "s", "id": 140, "pid": 2338710, "tid": 2338710, "ts": 6345936924961.818, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936925042.091, "dur": 4.397, + "args": { + "External id": 975147,"Sequence number": 10552327, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14191 + } + }, + { + "ph": "s", "id": 139, "pid": 2338710, "tid": 2338710, "ts": 6345936925042.091, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936925119.328, "dur": 206.024, + "args": { + "External id": 975148,"Sequence number": 10552328, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14192 + } + }, + { + "ph": "s", "id": 138, "pid": 2338710, "tid": 2338710, "ts": 6345936925119.328, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936925174.105, "dur": 6.389, + "args": { + "External id": 975149,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936925222.670, "dur": 84.592, + "args": { + "External id": 975150,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936925223.874, "dur": 7.393, + "args": { + "External id": 975151,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 14195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936925225.922, "dur": 3.912, + "args": { + "External id": 975152,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 14196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936925228.130, "dur": 1.523, + "args": { + "External id": 975153,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 14197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936925232.535, "dur": 74.228, + "args": { + "External id": 975154,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 14198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936925237.123, "dur": 3.114, + "args": { + "External id": 975155,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936925238.293, "dur": 1.819, + "args": { + "External id": 975156,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936925241.105, "dur": 61.329, + "args": { + "External id": 975157,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 14201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936925304.604, "dur": 1.320, + "args": { + "External id": 975158,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2338710, + "ts": 6345936925338.244, "dur": 30.961, + "args": { + "External id": 975159,"Sequence number": 10552329, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 14203 + } + }, + { + "ph": "s", "id": 137, "pid": 2338710, "tid": 2338710, "ts": 6345936925338.244, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936925412.605, "dur": 224.427, + "args": { + "External id": 975160,"Sequence number": 10552330, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 14204 + } + }, + { + "ph": "s", "id": 136, "pid": 2338710, "tid": 2338710, "ts": 6345936925412.605, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936925437.590, "dur": 4.315, + "args": { + "External id": 975161,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936925438.831, "dur": 2.587, + "args": { + "External id": 975162,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936925451.778, "dur": 9.098, + "args": { + "External id": 975163,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936925455.298, "dur": 4.944, + "args": { + "External id": 975164,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936925468.564, "dur": 4.249, + "args": { + "External id": 975165,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936925620.644, "dur": 3.342, + "args": { + "External id": 975166,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936925621.855, "dur": 1.861, + "args": { + "External id": 975167,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936925656.949, "dur": 101.596, + "args": { + "External id": 975168,"Sequence number": 10552331, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936925660.652, "dur": 8.199, + "args": { + "External id": 975169,"Sequence number": 10552331, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14213 + } + }, + { + "ph": "s", "id": 135, "pid": 2338710, "tid": 2338710, "ts": 6345936925660.652, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936925663.823, "dur": 3.770, + "args": { + "External id": 975170,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936925665.873, "dur": 1.512, + "args": { + "External id": 975171,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936925669.790, "dur": 88.484, + "args": { + "External id": 975172,"Sequence number": 10552332, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936925671.551, "dur": 8.974, + "args": { + "External id": 975173,"Sequence number": 10552332, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936925674.874, "dur": 5.484, + "args": { + "External id": 975174,"Sequence number": 10552332, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14218 + } + }, + { + "ph": "s", "id": 134, "pid": 2338710, "tid": 2338710, "ts": 6345936925674.874, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936925681.426, "dur": 70.902, + "args": { + "External id": 975175,"Sequence number": 10552333, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14219 + } + }, + { + "ph": "s", "id": 133, "pid": 2338710, "tid": 2338710, "ts": 6345936925681.426, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936925755.204, "dur": 2.363, + "args": { + "External id": 975176,"Sequence number": 10552334, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14220 + } + }, + { + "ph": "s", "id": 132, "pid": 2338710, "tid": 2338710, "ts": 6345936925755.204, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936925767.764, "dur": 77.825, + "args": { + "External id": 975177,"Sequence number": 10552335, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936925768.591, "dur": 10.659, + "args": { + "External id": 975178,"Sequence number": 10552335, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14222 + } + }, + { + "ph": "s", "id": 131, "pid": 2338710, "tid": 2338710, "ts": 6345936925768.591, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936925774.926, "dur": 3.173, + "args": { + "External id": 975179,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936925777.112, "dur": 0.811, + "args": { + "External id": 975180,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936925780.005, "dur": 65.348, + "args": { + "External id": 975181,"Sequence number": 10552336, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936925781.152, "dur": 5.911, + "args": { + "External id": 975182,"Sequence number": 10552336, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936925784.476, "dur": 2.456, + "args": { + "External id": 975183,"Sequence number": 10552336, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14227 + } + }, + { + "ph": "s", "id": 130, "pid": 2338710, "tid": 2338710, "ts": 6345936925784.476, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936925787.748, "dur": 49.120, + "args": { + "External id": 975184,"Sequence number": 10552337, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14228 + } + }, + { + "ph": "s", "id": 129, "pid": 2338710, "tid": 2338710, "ts": 6345936925787.748, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936925839.181, "dur": 5.478, + "args": { + "External id": 975185,"Sequence number": 10552338, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14229 + } + }, + { + "ph": "s", "id": 128, "pid": 2338710, "tid": 2338710, "ts": 6345936925839.181, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936925854.213, "dur": 68.720, + "args": { + "External id": 975186,"Sequence number": 10552339, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936925854.761, "dur": 9.742, + "args": { + "External id": 975187,"Sequence number": 10552339, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14231 + } + }, + { + "ph": "s", "id": 127, "pid": 2338710, "tid": 2338710, "ts": 6345936925854.761, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936925857.085, "dur": 6.063, + "args": { + "External id": 975188,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936925861.457, "dur": 1.402, + "args": { + "External id": 975189,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936925865.162, "dur": 57.491, + "args": { + "External id": 975190,"Sequence number": 10552340, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936925866.360, "dur": 7.110, + "args": { + "External id": 975191,"Sequence number": 10552340, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936925867.526, "dur": 5.802, + "args": { + "External id": 975192,"Sequence number": 10552340, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14236 + } + }, + { + "ph": "s", "id": 126, "pid": 2338710, "tid": 2338710, "ts": 6345936925867.526, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936925876.561, "dur": 40.382, + "args": { + "External id": 975193,"Sequence number": 10552341, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14237 + } + }, + { + "ph": "s", "id": 125, "pid": 2338710, "tid": 2338710, "ts": 6345936925876.561, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936925918.995, "dur": 3.217, + "args": { + "External id": 975194,"Sequence number": 10552342, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14238 + } + }, + { + "ph": "s", "id": 124, "pid": 2338710, "tid": 2338710, "ts": 6345936925918.995, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936925941.091, "dur": 3.961, + "args": { + "External id": 975195,"Sequence number": 10552343, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936925942.212, "dur": 2.688, + "args": { + "External id": 975196,"Sequence number": 10552343, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14240 + } + }, + { + "ph": "s", "id": 123, "pid": 2338710, "tid": 2338710, "ts": 6345936925942.212, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936925952.776, "dur": 5.744, + "args": { + "External id": 975197,"Sequence number": 10552344, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936925956.280, "dur": 2.109, + "args": { + "External id": 975198,"Sequence number": 10552344, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14242 + } + }, + { + "ph": "s", "id": 122, "pid": 2338710, "tid": 2338710, "ts": 6345936925956.280, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936925963.354, "dur": 3.588, + "args": { + "External id": 975199,"Sequence number": 10552345, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936925965.107, "dur": 1.705, + "args": { + "External id": 975200,"Sequence number": 10552345, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14244 + } + }, + { + "ph": "s", "id": 121, "pid": 2338710, "tid": 2338710, "ts": 6345936925965.107, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936926000.126, "dur": 264.118, + "args": { + "External id": 975201,"Sequence number": 10552346, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14245 + } + }, + { + "ph": "s", "id": 120, "pid": 2338710, "tid": 2338710, "ts": 6345936926000.126, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936926043.780, "dur": 53.195, + "args": { + "External id": 975202,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936926047.399, "dur": 48.199, + "args": { + "External id": 975203,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936926283.975, "dur": 128.320, + "args": { + "External id": 975204,"Sequence number": 10552347, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14248 + } + }, + { + "ph": "s", "id": 119, "pid": 2338710, "tid": 2338710, "ts": 6345936926283.975, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936926301.080, "dur": 9.568, + "args": { + "External id": 975205,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 14249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936926304.431, "dur": 5.753, + "args": { + "External id": 975206,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338710, "tid": 2338710, + "ts": 6345936926447.627, "dur": 214.032, + "args": { + "External id": 975207,"Sequence number": 10552348, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 14251 + } + }, + { + "ph": "s", "id": 118, "pid": 2338710, "tid": 2338710, "ts": 6345936926447.627, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936926487.220, "dur": 146.928, + "args": { + "External id": 975208,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936926544.474, "dur": 9.405, + "args": { + "External id": 975209,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936926548.554, "dur": 4.644, + "args": { + "External id": 975210,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936926556.931, "dur": 4.551, + "args": { + "External id": 975211,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936926562.812, "dur": 1.607, + "args": { + "External id": 975212,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936926567.152, "dur": 3.688, + "args": { + "External id": 975213,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2338710, + "ts": 6345936926646.698, "dur": 5.261, + "args": { + "External id": 975214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 14258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936926668.386, "dur": 8.368, + "args": { + "External id": 975215,"Sequence number": 10552349, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936926670.241, "dur": 6.350, + "args": { + "External id": 975216,"Sequence number": 10552349, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14260 + } + }, + { + "ph": "s", "id": 117, "pid": 2338710, "tid": 2338710, "ts": 6345936926670.241, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936926690.227, "dur": 130.380, + "args": { + "External id": 975217,"Sequence number": 10552350, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936926691.827, "dur": 9.642, + "args": { + "External id": 975218,"Sequence number": 10552350, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14262 + } + }, + { + "ph": "s", "id": 116, "pid": 2338710, "tid": 2338710, "ts": 6345936926691.827, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936926694.786, "dur": 5.523, + "args": { + "External id": 975219,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936926697.779, "dur": 2.136, + "args": { + "External id": 975220,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936926703.168, "dur": 117.140, + "args": { + "External id": 975221,"Sequence number": 10552351, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936926707.738, "dur": 3.613, + "args": { + "External id": 975222,"Sequence number": 10552351, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936926708.768, "dur": 2.448, + "args": { + "External id": 975223,"Sequence number": 10552351, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14267 + } + }, + { + "ph": "s", "id": 115, "pid": 2338710, "tid": 2338710, "ts": 6345936926708.768, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936926712.408, "dur": 98.182, + "args": { + "External id": 975224,"Sequence number": 10552352, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14268 + } + }, + { + "ph": "s", "id": 114, "pid": 2338710, "tid": 2338710, "ts": 6345936926712.408, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936926813.521, "dur": 6.008, + "args": { + "External id": 975225,"Sequence number": 10552353, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14269 + } + }, + { + "ph": "s", "id": 113, "pid": 2338710, "tid": 2338710, "ts": 6345936926813.521, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936926871.186, "dur": 319.706, + "args": { + "External id": 975226,"Sequence number": 10552354, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 14270 + } + }, + { + "ph": "s", "id": 112, "pid": 2338710, "tid": 2338710, "ts": 6345936926871.186, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936926892.200, "dur": 2.666, + "args": { + "External id": 975227,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936926892.955, "dur": 1.760, + "args": { + "External id": 975228,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936926899.409, "dur": 5.975, + "args": { + "External id": 975229,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 14273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936926903.204, "dur": 2.056, + "args": { + "External id": 975230,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936926904.099, "dur": 1.050, + "args": { + "External id": 975231,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936926914.108, "dur": 7.588, + "args": { + "External id": 975232,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936926916.603, "dur": 4.598, + "args": { + "External id": 975233,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936926928.974, "dur": 2.762, + "args": { + "External id": 975234,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936926935.623, "dur": 6.416, + "args": { + "External id": 975235,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936927163.178, "dur": 5.409, + "args": { + "External id": 975236,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936927164.583, "dur": 3.399, + "args": { + "External id": 975237,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936927172.458, "dur": 2.381, + "args": { + "External id": 975238,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936927173.499, "dur": 1.204, + "args": { + "External id": 975239,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936927214.502, "dur": 126.701, + "args": { + "External id": 975240,"Sequence number": 10552355, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936927216.107, "dur": 13.470, + "args": { + "External id": 975241,"Sequence number": 10552355, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14285 + } + }, + { + "ph": "s", "id": 111, "pid": 2338710, "tid": 2338710, "ts": 6345936927216.107, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936927219.308, "dur": 8.731, + "args": { + "External id": 975242,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936927224.563, "dur": 3.080, + "args": { + "External id": 975243,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936927230.836, "dur": 110.082, + "args": { + "External id": 975244,"Sequence number": 10552356, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936927233.007, "dur": 5.862, + "args": { + "External id": 975245,"Sequence number": 10552356, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936927233.799, "dur": 4.713, + "args": { + "External id": 975246,"Sequence number": 10552356, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14290 + } + }, + { + "ph": "s", "id": 110, "pid": 2338710, "tid": 2338710, "ts": 6345936927233.799, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936927242.145, "dur": 90.485, + "args": { + "External id": 975247,"Sequence number": 10552357, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14291 + } + }, + { + "ph": "s", "id": 109, "pid": 2338710, "tid": 2338710, "ts": 6345936927242.145, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936927335.205, "dur": 4.991, + "args": { + "External id": 975248,"Sequence number": 10552358, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14292 + } + }, + { + "ph": "s", "id": 108, "pid": 2338710, "tid": 2338710, "ts": 6345936927335.205, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936927350.570, "dur": 97.817, + "args": { + "External id": 975249,"Sequence number": 10552359, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936927351.175, "dur": 8.508, + "args": { + "External id": 975250,"Sequence number": 10552359, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14294 + } + }, + { + "ph": "s", "id": 107, "pid": 2338710, "tid": 2338710, "ts": 6345936927351.175, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936927353.040, "dur": 5.275, + "args": { + "External id": 975251,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936927356.953, "dur": 0.964, + "args": { + "External id": 975252,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936927369.288, "dur": 78.875, + "args": { + "External id": 975253,"Sequence number": 10552360, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936927374.584, "dur": 6.580, + "args": { + "External id": 975254,"Sequence number": 10552360, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936927375.769, "dur": 5.226, + "args": { + "External id": 975255,"Sequence number": 10552360, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14299 + } + }, + { + "ph": "s", "id": 106, "pid": 2338710, "tid": 2338710, "ts": 6345936927375.769, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936927381.845, "dur": 59.686, + "args": { + "External id": 975256,"Sequence number": 10552361, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14300 + } + }, + { + "ph": "s", "id": 105, "pid": 2338710, "tid": 2338710, "ts": 6345936927381.845, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936927443.699, "dur": 3.994, + "args": { + "External id": 975257,"Sequence number": 10552362, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14301 + } + }, + { + "ph": "s", "id": 104, "pid": 2338710, "tid": 2338710, "ts": 6345936927443.699, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936927473.785, "dur": 171.288, + "args": { + "External id": 975258,"Sequence number": 10552363, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14302 + } + }, + { + "ph": "s", "id": 103, "pid": 2338710, "tid": 2338710, "ts": 6345936927473.785, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936927518.519, "dur": 5.380, + "args": { + "External id": 975259,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936927566.192, "dur": 65.581, + "args": { + "External id": 975260,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936927567.119, "dur": 5.110, + "args": { + "External id": 975261,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 14305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936927568.374, "dur": 3.004, + "args": { + "External id": 975262,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 14306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936927570.279, "dur": 0.884, + "args": { + "External id": 975263,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 14307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936927573.063, "dur": 58.395, + "args": { + "External id": 975264,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 14308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936927574.459, "dur": 2.259, + "args": { + "External id": 975265,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936927575.262, "dur": 1.334, + "args": { + "External id": 975266,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936927577.311, "dur": 49.538, + "args": { + "External id": 975267,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 14311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936927629.643, "dur": 1.257, + "args": { + "External id": 975268,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2338710, + "ts": 6345936927655.387, "dur": 28.350, + "args": { + "External id": 975269,"Sequence number": 10552364, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 14313 + } + }, + { + "ph": "s", "id": 102, "pid": 2338710, "tid": 2338710, "ts": 6345936927655.387, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936927726.525, "dur": 214.409, + "args": { + "External id": 975270,"Sequence number": 10552365, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 14314 + } + }, + { + "ph": "s", "id": 101, "pid": 2338710, "tid": 2338710, "ts": 6345936927726.525, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936927747.309, "dur": 3.259, + "args": { + "External id": 975271,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936927748.219, "dur": 2.080, + "args": { + "External id": 975272,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936927760.042, "dur": 8.103, + "args": { + "External id": 975273,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936927763.053, "dur": 4.688, + "args": { + "External id": 975274,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936927775.711, "dur": 6.043, + "args": { + "External id": 975275,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936927925.350, "dur": 3.034, + "args": { + "External id": 975276,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936927926.539, "dur": 1.583, + "args": { + "External id": 975277,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936927960.787, "dur": 164.699, + "args": { + "External id": 975278,"Sequence number": 10552366, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936927961.820, "dur": 7.545, + "args": { + "External id": 975279,"Sequence number": 10552366, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14323 + } + }, + { + "ph": "s", "id": 100, "pid": 2338710, "tid": 2338710, "ts": 6345936927961.820, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936927964.519, "dur": 3.306, + "args": { + "External id": 975280,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936927966.187, "dur": 1.387, + "args": { + "External id": 975281,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936927972.928, "dur": 152.028, + "args": { + "External id": 975282,"Sequence number": 10552367, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936927974.659, "dur": 3.983, + "args": { + "External id": 975283,"Sequence number": 10552367, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936927975.622, "dur": 2.870, + "args": { + "External id": 975284,"Sequence number": 10552367, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14328 + } + }, + { + "ph": "s", "id": 99, "pid": 2338710, "tid": 2338710, "ts": 6345936927975.622, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936927979.423, "dur": 132.567, + "args": { + "External id": 975285,"Sequence number": 10552368, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14329 + } + }, + { + "ph": "s", "id": 98, "pid": 2338710, "tid": 2338710, "ts": 6345936927979.423, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936928116.389, "dur": 7.617, + "args": { + "External id": 975286,"Sequence number": 10552369, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14330 + } + }, + { + "ph": "s", "id": 97, "pid": 2338710, "tid": 2338710, "ts": 6345936928116.389, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936928137.730, "dur": 79.964, + "args": { + "External id": 975287,"Sequence number": 10552370, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936928138.785, "dur": 7.543, + "args": { + "External id": 975288,"Sequence number": 10552370, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14332 + } + }, + { + "ph": "s", "id": 96, "pid": 2338710, "tid": 2338710, "ts": 6345936928138.785, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936928140.929, "dur": 3.706, + "args": { + "External id": 975289,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936928143.214, "dur": 1.224, + "args": { + "External id": 975290,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936928147.194, "dur": 70.205, + "args": { + "External id": 975291,"Sequence number": 10552371, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936928150.919, "dur": 4.510, + "args": { + "External id": 975292,"Sequence number": 10552371, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936928151.868, "dur": 3.407, + "args": { + "External id": 975293,"Sequence number": 10552371, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14337 + } + }, + { + "ph": "s", "id": 95, "pid": 2338710, "tid": 2338710, "ts": 6345936928151.868, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936928156.326, "dur": 55.902, + "args": { + "External id": 975294,"Sequence number": 10552372, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14338 + } + }, + { + "ph": "s", "id": 94, "pid": 2338710, "tid": 2338710, "ts": 6345936928156.326, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936928214.249, "dur": 2.737, + "args": { + "External id": 975295,"Sequence number": 10552373, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14339 + } + }, + { + "ph": "s", "id": 93, "pid": 2338710, "tid": 2338710, "ts": 6345936928214.249, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936928226.106, "dur": 68.674, + "args": { + "External id": 975296,"Sequence number": 10552374, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936928229.549, "dur": 5.943, + "args": { + "External id": 975297,"Sequence number": 10552374, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14341 + } + }, + { + "ph": "s", "id": 92, "pid": 2338710, "tid": 2338710, "ts": 6345936928229.549, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936928231.382, "dur": 2.714, + "args": { + "External id": 975298,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936928233.048, "dur": 0.859, + "args": { + "External id": 975299,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936928236.150, "dur": 58.336, + "args": { + "External id": 975300,"Sequence number": 10552375, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936928237.380, "dur": 8.063, + "args": { + "External id": 975301,"Sequence number": 10552375, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936928240.456, "dur": 4.735, + "args": { + "External id": 975302,"Sequence number": 10552375, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14346 + } + }, + { + "ph": "s", "id": 91, "pid": 2338710, "tid": 2338710, "ts": 6345936928240.456, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936928246.333, "dur": 40.351, + "args": { + "External id": 975303,"Sequence number": 10552376, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14347 + } + }, + { + "ph": "s", "id": 90, "pid": 2338710, "tid": 2338710, "ts": 6345936928246.333, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936928288.506, "dur": 5.557, + "args": { + "External id": 975304,"Sequence number": 10552377, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14348 + } + }, + { + "ph": "s", "id": 89, "pid": 2338710, "tid": 2338710, "ts": 6345936928288.506, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936928313.684, "dur": 6.690, + "args": { + "External id": 975305,"Sequence number": 10552378, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936928317.449, "dur": 2.775, + "args": { + "External id": 975306,"Sequence number": 10552378, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14350 + } + }, + { + "ph": "s", "id": 88, "pid": 2338710, "tid": 2338710, "ts": 6345936928317.449, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936928328.862, "dur": 3.416, + "args": { + "External id": 975307,"Sequence number": 10552379, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936928330.071, "dur": 2.075, + "args": { + "External id": 975308,"Sequence number": 10552379, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14352 + } + }, + { + "ph": "s", "id": 87, "pid": 2338710, "tid": 2338710, "ts": 6345936928330.071, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936928337.819, "dur": 5.419, + "args": { + "External id": 975309,"Sequence number": 10552380, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936928339.143, "dur": 3.964, + "args": { + "External id": 975310,"Sequence number": 10552380, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14354 + } + }, + { + "ph": "s", "id": 86, "pid": 2338710, "tid": 2338710, "ts": 6345936928339.143, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936928380.163, "dur": 183.971, + "args": { + "External id": 975311,"Sequence number": 10552381, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14355 + } + }, + { + "ph": "s", "id": 85, "pid": 2338710, "tid": 2338710, "ts": 6345936928380.163, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936928402.011, "dur": 10.158, + "args": { + "External id": 975312,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936928405.153, "dur": 6.556, + "args": { + "External id": 975313,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936928579.734, "dur": 121.393, + "args": { + "External id": 975314,"Sequence number": 10552382, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14358 + } + }, + { + "ph": "s", "id": 84, "pid": 2338710, "tid": 2338710, "ts": 6345936928579.734, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936928594.803, "dur": 7.653, + "args": { + "External id": 975315,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 14359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936928597.365, "dur": 4.595, + "args": { + "External id": 975316,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338710, "tid": 2338710, + "ts": 6345936928734.923, "dur": 206.176, + "args": { + "External id": 975317,"Sequence number": 10552383, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 14361 + } + }, + { + "ph": "s", "id": 83, "pid": 2338710, "tid": 2338710, "ts": 6345936928734.923, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936928765.021, "dur": 148.401, + "args": { + "External id": 975318,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936928822.256, "dur": 7.852, + "args": { + "External id": 975319,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936928825.189, "dur": 4.249, + "args": { + "External id": 975320,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936928833.331, "dur": 6.842, + "args": { + "External id": 975321,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936928841.944, "dur": 1.445, + "args": { + "External id": 975322,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936928848.448, "dur": 3.637, + "args": { + "External id": 975323,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2338710, + "ts": 6345936928926.300, "dur": 5.250, + "args": { + "External id": 975324,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 14368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936928947.953, "dur": 5.633, + "args": { + "External id": 975325,"Sequence number": 10552384, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936928949.466, "dur": 3.921, + "args": { + "External id": 975326,"Sequence number": 10552384, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14370 + } + }, + { + "ph": "s", "id": 82, "pid": 2338710, "tid": 2338710, "ts": 6345936928949.466, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936928967.082, "dur": 215.548, + "args": { + "External id": 975327,"Sequence number": 10552385, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936928968.464, "dur": 14.391, + "args": { + "External id": 975328,"Sequence number": 10552385, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14372 + } + }, + { + "ph": "s", "id": 81, "pid": 2338710, "tid": 2338710, "ts": 6345936928968.464, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936928974.368, "dur": 7.071, + "args": { + "External id": 975329,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936928979.280, "dur": 1.800, + "args": { + "External id": 975330,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936928984.404, "dur": 197.478, + "args": { + "External id": 975331,"Sequence number": 10552386, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936928986.675, "dur": 4.894, + "args": { + "External id": 975332,"Sequence number": 10552386, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936928987.856, "dur": 3.557, + "args": { + "External id": 975333,"Sequence number": 10552386, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14377 + } + }, + { + "ph": "s", "id": 80, "pid": 2338710, "tid": 2338710, "ts": 6345936928987.856, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936928994.990, "dur": 173.290, + "args": { + "External id": 975334,"Sequence number": 10552387, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14378 + } + }, + { + "ph": "s", "id": 79, "pid": 2338710, "tid": 2338710, "ts": 6345936928994.990, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936929173.576, "dur": 7.200, + "args": { + "External id": 975335,"Sequence number": 10552388, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14379 + } + }, + { + "ph": "s", "id": 78, "pid": 2338710, "tid": 2338710, "ts": 6345936929173.576, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936929230.103, "dur": 262.355, + "args": { + "External id": 975336,"Sequence number": 10552389, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 14380 + } + }, + { + "ph": "s", "id": 77, "pid": 2338710, "tid": 2338710, "ts": 6345936929230.103, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936929253.197, "dur": 6.390, + "args": { + "External id": 975337,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936929256.804, "dur": 2.562, + "args": { + "External id": 975338,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936929264.444, "dur": 3.713, + "args": { + "External id": 975339,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 14383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936929265.779, "dur": 2.243, + "args": { + "External id": 975340,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936929266.740, "dur": 1.168, + "args": { + "External id": 975341,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936929278.012, "dur": 11.731, + "args": { + "External id": 975342,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936929280.644, "dur": 8.725, + "args": { + "External id": 975343,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936929300.096, "dur": 3.597, + "args": { + "External id": 975344,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936929307.663, "dur": 4.284, + "args": { + "External id": 975345,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936929466.954, "dur": 4.016, + "args": { + "External id": 975346,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936929468.441, "dur": 2.169, + "args": { + "External id": 975347,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936929473.760, "dur": 2.460, + "args": { + "External id": 975348,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936929475.070, "dur": 1.045, + "args": { + "External id": 975349,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936929513.743, "dur": 131.201, + "args": { + "External id": 975350,"Sequence number": 10552390, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936929515.218, "dur": 22.574, + "args": { + "External id": 975351,"Sequence number": 10552390, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14395 + } + }, + { + "ph": "s", "id": 76, "pid": 2338710, "tid": 2338710, "ts": 6345936929515.218, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936929520.150, "dur": 16.167, + "args": { + "External id": 975352,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936929533.917, "dur": 2.100, + "args": { + "External id": 975353,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936929538.980, "dur": 105.587, + "args": { + "External id": 975354,"Sequence number": 10552391, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936929540.997, "dur": 7.047, + "args": { + "External id": 975355,"Sequence number": 10552391, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936929544.224, "dur": 3.668, + "args": { + "External id": 975356,"Sequence number": 10552391, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14400 + } + }, + { + "ph": "s", "id": 75, "pid": 2338710, "tid": 2338710, "ts": 6345936929544.224, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936929549.190, "dur": 87.351, + "args": { + "External id": 975357,"Sequence number": 10552392, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14401 + } + }, + { + "ph": "s", "id": 74, "pid": 2338710, "tid": 2338710, "ts": 6345936929549.190, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936929639.394, "dur": 4.508, + "args": { + "External id": 975358,"Sequence number": 10552393, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14402 + } + }, + { + "ph": "s", "id": 73, "pid": 2338710, "tid": 2338710, "ts": 6345936929639.394, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936929653.825, "dur": 156.401, + "args": { + "External id": 975359,"Sequence number": 10552394, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936929654.424, "dur": 83.896, + "args": { + "External id": 975360,"Sequence number": 10552394, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14404 + } + }, + { + "ph": "s", "id": 72, "pid": 2338710, "tid": 2338710, "ts": 6345936929654.424, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936929728.864, "dur": 8.135, + "args": { + "External id": 975361,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936929733.269, "dur": 3.555, + "args": { + "External id": 975362,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936929738.967, "dur": 70.874, + "args": { + "External id": 975363,"Sequence number": 10552395, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936929740.058, "dur": 7.476, + "args": { + "External id": 975364,"Sequence number": 10552395, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936929740.604, "dur": 6.748, + "args": { + "External id": 975365,"Sequence number": 10552395, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14409 + } + }, + { + "ph": "s", "id": 71, "pid": 2338710, "tid": 2338710, "ts": 6345936929740.604, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936929748.237, "dur": 55.031, + "args": { + "External id": 975366,"Sequence number": 10552396, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14410 + } + }, + { + "ph": "s", "id": 70, "pid": 2338710, "tid": 2338710, "ts": 6345936929748.237, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936929805.299, "dur": 3.725, + "args": { + "External id": 975367,"Sequence number": 10552397, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14411 + } + }, + { + "ph": "s", "id": 69, "pid": 2338710, "tid": 2338710, "ts": 6345936929805.299, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936929835.429, "dur": 195.764, + "args": { + "External id": 975368,"Sequence number": 10552398, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14412 + } + }, + { + "ph": "s", "id": 68, "pid": 2338710, "tid": 2338710, "ts": 6345936929835.429, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936929882.458, "dur": 4.440, + "args": { + "External id": 975369,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936929924.965, "dur": 70.840, + "args": { + "External id": 975370,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936929925.610, "dur": 4.675, + "args": { + "External id": 975371,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 14415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936929926.850, "dur": 2.555, + "args": { + "External id": 975372,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 14416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936929928.405, "dur": 0.726, + "args": { + "External id": 975373,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 14417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936929930.937, "dur": 64.488, + "args": { + "External id": 975374,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 14418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936929932.373, "dur": 4.729, + "args": { + "External id": 975375,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936929935.815, "dur": 1.170, + "args": { + "External id": 975376,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936929937.690, "dur": 51.101, + "args": { + "External id": 975377,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 14421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936929991.347, "dur": 3.433, + "args": { + "External id": 975378,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2338710, + "ts": 6345936930045.615, "dur": 76.263, + "args": { + "External id": 975379,"Sequence number": 10552399, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 14423 + } + }, + { + "ph": "s", "id": 67, "pid": 2338710, "tid": 2338710, "ts": 6345936930045.615, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936930171.899, "dur": 226.771, + "args": { + "External id": 975380,"Sequence number": 10552400, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 14424 + } + }, + { + "ph": "s", "id": 66, "pid": 2338710, "tid": 2338710, "ts": 6345936930171.899, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936930196.481, "dur": 4.059, + "args": { + "External id": 975381,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936930197.423, "dur": 2.835, + "args": { + "External id": 975382,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936930210.773, "dur": 11.808, + "args": { + "External id": 975383,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936930216.720, "dur": 5.298, + "args": { + "External id": 975384,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936930230.192, "dur": 4.640, + "args": { + "External id": 975385,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936930381.793, "dur": 2.760, + "args": { + "External id": 975386,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936930382.387, "dur": 1.919, + "args": { + "External id": 975387,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936930419.071, "dur": 115.456, + "args": { + "External id": 975388,"Sequence number": 10552401, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936930420.111, "dur": 10.633, + "args": { + "External id": 975389,"Sequence number": 10552401, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14433 + } + }, + { + "ph": "s", "id": 65, "pid": 2338710, "tid": 2338710, "ts": 6345936930420.111, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936930422.828, "dur": 6.152, + "args": { + "External id": 975390,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936930427.460, "dur": 1.260, + "args": { + "External id": 975391,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936930431.903, "dur": 102.237, + "args": { + "External id": 975392,"Sequence number": 10552402, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936930433.720, "dur": 3.953, + "args": { + "External id": 975393,"Sequence number": 10552402, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936930434.488, "dur": 2.988, + "args": { + "External id": 975394,"Sequence number": 10552402, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14438 + } + }, + { + "ph": "s", "id": 64, "pid": 2338710, "tid": 2338710, "ts": 6345936930434.488, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936930441.223, "dur": 84.884, + "args": { + "External id": 975395,"Sequence number": 10552403, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14439 + } + }, + { + "ph": "s", "id": 63, "pid": 2338710, "tid": 2338710, "ts": 6345936930441.223, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936930528.518, "dur": 4.912, + "args": { + "External id": 975396,"Sequence number": 10552404, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14440 + } + }, + { + "ph": "s", "id": 62, "pid": 2338710, "tid": 2338710, "ts": 6345936930528.518, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936930548.278, "dur": 72.930, + "args": { + "External id": 975397,"Sequence number": 10552405, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936930548.909, "dur": 8.527, + "args": { + "External id": 975398,"Sequence number": 10552405, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14442 + } + }, + { + "ph": "s", "id": 61, "pid": 2338710, "tid": 2338710, "ts": 6345936930548.909, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936930551.200, "dur": 4.969, + "args": { + "External id": 975399,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936930555.193, "dur": 0.820, + "args": { + "External id": 975400,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936930558.387, "dur": 62.547, + "args": { + "External id": 975401,"Sequence number": 10552406, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936930559.287, "dur": 4.888, + "args": { + "External id": 975402,"Sequence number": 10552406, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936930560.166, "dur": 3.825, + "args": { + "External id": 975403,"Sequence number": 10552406, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14447 + } + }, + { + "ph": "s", "id": 60, "pid": 2338710, "tid": 2338710, "ts": 6345936930560.166, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936930564.830, "dur": 50.012, + "args": { + "External id": 975404,"Sequence number": 10552407, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14448 + } + }, + { + "ph": "s", "id": 59, "pid": 2338710, "tid": 2338710, "ts": 6345936930564.830, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936930616.806, "dur": 3.524, + "args": { + "External id": 975405,"Sequence number": 10552408, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14449 + } + }, + { + "ph": "s", "id": 58, "pid": 2338710, "tid": 2338710, "ts": 6345936930616.806, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936930628.894, "dur": 67.386, + "args": { + "External id": 975406,"Sequence number": 10552409, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936930629.654, "dur": 7.624, + "args": { + "External id": 975407,"Sequence number": 10552409, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14451 + } + }, + { + "ph": "s", "id": 57, "pid": 2338710, "tid": 2338710, "ts": 6345936930629.654, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936930631.880, "dur": 4.205, + "args": { + "External id": 975408,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936930635.101, "dur": 0.849, + "args": { + "External id": 975409,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936930638.305, "dur": 57.683, + "args": { + "External id": 975410,"Sequence number": 10552410, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936930641.454, "dur": 4.567, + "args": { + "External id": 975411,"Sequence number": 10552410, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936930642.064, "dur": 3.779, + "args": { + "External id": 975412,"Sequence number": 10552410, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14456 + } + }, + { + "ph": "s", "id": 56, "pid": 2338710, "tid": 2338710, "ts": 6345936930642.064, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936930646.976, "dur": 43.854, + "args": { + "External id": 975413,"Sequence number": 10552411, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14457 + } + }, + { + "ph": "s", "id": 55, "pid": 2338710, "tid": 2338710, "ts": 6345936930646.976, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936930692.871, "dur": 2.773, + "args": { + "External id": 975414,"Sequence number": 10552412, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14458 + } + }, + { + "ph": "s", "id": 54, "pid": 2338710, "tid": 2338710, "ts": 6345936930692.871, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936930716.215, "dur": 4.470, + "args": { + "External id": 975415,"Sequence number": 10552413, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936930717.774, "dur": 2.766, + "args": { + "External id": 975416,"Sequence number": 10552413, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14460 + } + }, + { + "ph": "s", "id": 53, "pid": 2338710, "tid": 2338710, "ts": 6345936930717.774, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936930728.149, "dur": 5.142, + "args": { + "External id": 975417,"Sequence number": 10552414, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936930729.377, "dur": 3.776, + "args": { + "External id": 975418,"Sequence number": 10552414, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14462 + } + }, + { + "ph": "s", "id": 52, "pid": 2338710, "tid": 2338710, "ts": 6345936930729.377, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936930737.864, "dur": 5.239, + "args": { + "External id": 975419,"Sequence number": 10552415, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936930741.061, "dur": 1.918, + "args": { + "External id": 975420,"Sequence number": 10552415, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14464 + } + }, + { + "ph": "s", "id": 51, "pid": 2338710, "tid": 2338710, "ts": 6345936930741.061, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936930776.621, "dur": 158.684, + "args": { + "External id": 975421,"Sequence number": 10552416, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14465 + } + }, + { + "ph": "s", "id": 50, "pid": 2338710, "tid": 2338710, "ts": 6345936930776.621, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936930796.476, "dur": 8.428, + "args": { + "External id": 975422,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936930799.481, "dur": 4.998, + "args": { + "External id": 975423,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936930948.759, "dur": 188.557, + "args": { + "External id": 975424,"Sequence number": 10552417, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14468 + } + }, + { + "ph": "s", "id": 49, "pid": 2338710, "tid": 2338710, "ts": 6345936930948.759, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936930963.588, "dur": 7.273, + "args": { + "External id": 975425,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 14469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936930966.427, "dur": 4.069, + "args": { + "External id": 975426,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338710, "tid": 2338710, + "ts": 6345936931176.521, "dur": 210.904, + "args": { + "External id": 975427,"Sequence number": 10552418, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 14471 + } + }, + { + "ph": "s", "id": 48, "pid": 2338710, "tid": 2338710, "ts": 6345936931176.521, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345936931207.273, "dur": 150.555, + "args": { + "External id": 975428,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936931269.002, "dur": 10.065, + "args": { + "External id": 975429,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936931272.116, "dur": 6.220, + "args": { + "External id": 975430,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936931282.516, "dur": 4.042, + "args": { + "External id": 975431,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936931288.158, "dur": 1.506, + "args": { + "External id": 975432,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936931292.658, "dur": 5.004, + "args": { + "External id": 975433,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2338710, + "ts": 6345936931371.003, "dur": 5.980, + "args": { + "External id": 975434,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 14478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936931393.717, "dur": 6.701, + "args": { + "External id": 975435,"Sequence number": 10552419, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936931395.235, "dur": 5.015, + "args": { + "External id": 975436,"Sequence number": 10552419, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14480 + } + }, + { + "ph": "s", "id": 47, "pid": 2338710, "tid": 2338710, "ts": 6345936931395.235, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936931414.222, "dur": 131.381, + "args": { + "External id": 975437,"Sequence number": 10552420, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936931415.664, "dur": 12.703, + "args": { + "External id": 975438,"Sequence number": 10552420, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14482 + } + }, + { + "ph": "s", "id": 46, "pid": 2338710, "tid": 2338710, "ts": 6345936931415.664, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936931421.909, "dur": 5.100, + "args": { + "External id": 975439,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936931424.479, "dur": 2.162, + "args": { + "External id": 975440,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936931430.036, "dur": 115.127, + "args": { + "External id": 975441,"Sequence number": 10552421, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936931432.340, "dur": 5.430, + "args": { + "External id": 975442,"Sequence number": 10552421, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936931432.986, "dur": 4.629, + "args": { + "External id": 975443,"Sequence number": 10552421, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14487 + } + }, + { + "ph": "s", "id": 45, "pid": 2338710, "tid": 2338710, "ts": 6345936931432.986, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936931438.916, "dur": 98.323, + "args": { + "External id": 975444,"Sequence number": 10552422, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14488 + } + }, + { + "ph": "s", "id": 44, "pid": 2338710, "tid": 2338710, "ts": 6345936931438.916, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936931540.292, "dur": 3.971, + "args": { + "External id": 975445,"Sequence number": 10552423, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14489 + } + }, + { + "ph": "s", "id": 43, "pid": 2338710, "tid": 2338710, "ts": 6345936931540.292, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936931584.256, "dur": 245.441, + "args": { + "External id": 975446,"Sequence number": 10552424, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 14490 + } + }, + { + "ph": "s", "id": 42, "pid": 2338710, "tid": 2338710, "ts": 6345936931584.256, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936931605.730, "dur": 7.373, + "args": { + "External id": 975447,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936931609.125, "dur": 3.793, + "args": { + "External id": 975448,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338710, "tid": 2338710, + "ts": 6345936931617.422, "dur": 3.397, + "args": { + "External id": 975449,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 14493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936931618.696, "dur": 1.983, + "args": { + "External id": 975450,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936931619.650, "dur": 0.911, + "args": { + "External id": 975451,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936931628.841, "dur": 6.676, + "args": { + "External id": 975452,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936931631.030, "dur": 4.042, + "args": { + "External id": 975453,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936931645.306, "dur": 2.896, + "args": { + "External id": 975454,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936931652.438, "dur": 3.524, + "args": { + "External id": 975455,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936931801.446, "dur": 4.539, + "args": { + "External id": 975456,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936931802.388, "dur": 3.276, + "args": { + "External id": 975457,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936931808.553, "dur": 2.670, + "args": { + "External id": 975458,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936931810.058, "dur": 1.019, + "args": { + "External id": 975459,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936931848.990, "dur": 108.635, + "args": { + "External id": 975460,"Sequence number": 10552425, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936931850.054, "dur": 7.705, + "args": { + "External id": 975461,"Sequence number": 10552425, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14505 + } + }, + { + "ph": "s", "id": 41, "pid": 2338710, "tid": 2338710, "ts": 6345936931850.054, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936931852.549, "dur": 4.114, + "args": { + "External id": 975462,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936931854.767, "dur": 1.566, + "args": { + "External id": 975463,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936931858.856, "dur": 98.471, + "args": { + "External id": 975464,"Sequence number": 10552426, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936931860.338, "dur": 8.040, + "args": { + "External id": 975465,"Sequence number": 10552426, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936931863.731, "dur": 4.469, + "args": { + "External id": 975466,"Sequence number": 10552426, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14510 + } + }, + { + "ph": "s", "id": 40, "pid": 2338710, "tid": 2338710, "ts": 6345936931863.731, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936931869.277, "dur": 82.098, + "args": { + "External id": 975467,"Sequence number": 10552427, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14511 + } + }, + { + "ph": "s", "id": 39, "pid": 2338710, "tid": 2338710, "ts": 6345936931869.277, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936931953.757, "dur": 2.921, + "args": { + "External id": 975468,"Sequence number": 10552428, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14512 + } + }, + { + "ph": "s", "id": 38, "pid": 2338710, "tid": 2338710, "ts": 6345936931953.757, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936931966.879, "dur": 153.709, + "args": { + "External id": 975469,"Sequence number": 10552429, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936931967.577, "dur": 10.898, + "args": { + "External id": 975470,"Sequence number": 10552429, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14514 + } + }, + { + "ph": "s", "id": 37, "pid": 2338710, "tid": 2338710, "ts": 6345936931967.577, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936931972.909, "dur": 4.240, + "args": { + "External id": 975471,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936931976.113, "dur": 0.745, + "args": { + "External id": 975472,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936931979.354, "dur": 140.887, + "args": { + "External id": 975473,"Sequence number": 10552430, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936931980.504, "dur": 7.346, + "args": { + "External id": 975474,"Sequence number": 10552430, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936931983.883, "dur": 3.655, + "args": { + "External id": 975475,"Sequence number": 10552430, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14519 + } + }, + { + "ph": "s", "id": 36, "pid": 2338710, "tid": 2338710, "ts": 6345936931983.883, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936931988.951, "dur": 121.453, + "args": { + "External id": 975476,"Sequence number": 10552431, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14520 + } + }, + { + "ph": "s", "id": 35, "pid": 2338710, "tid": 2338710, "ts": 6345936931988.951, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936932114.473, "dur": 5.087, + "args": { + "External id": 975477,"Sequence number": 10552432, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14521 + } + }, + { + "ph": "s", "id": 34, "pid": 2338710, "tid": 2338710, "ts": 6345936932114.473, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936932148.051, "dur": 188.538, + "args": { + "External id": 975478,"Sequence number": 10552433, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14522 + } + }, + { + "ph": "s", "id": 33, "pid": 2338710, "tid": 2338710, "ts": 6345936932148.051, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936932197.201, "dur": 6.417, + "args": { + "External id": 975479,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345936932240.292, "dur": 80.367, + "args": { + "External id": 975480,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345936932241.254, "dur": 6.395, + "args": { + "External id": 975481,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 14525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936932242.945, "dur": 3.454, + "args": { + "External id": 975482,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 14526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936932244.845, "dur": 1.187, + "args": { + "External id": 975483,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 14527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345936932248.730, "dur": 71.546, + "args": { + "External id": 975484,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 14528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345936932250.016, "dur": 7.577, + "args": { + "External id": 975485,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936932253.375, "dur": 4.062, + "args": { + "External id": 975486,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345936932258.451, "dur": 57.281, + "args": { + "External id": 975487,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 14531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345936932318.457, "dur": 1.193, + "args": { + "External id": 975488,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2338710, + "ts": 6345936932347.256, "dur": 27.168, + "args": { + "External id": 975489,"Sequence number": 10552434, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 14533 + } + }, + { + "ph": "s", "id": 32, "pid": 2338710, "tid": 2338710, "ts": 6345936932347.256, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2338710, "tid": 2338710, + "ts": 6345936932397.191, "dur": 44.607, + "args": { + "External id": 975490,"Sequence number": 10552435, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "-2"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1]], []], "Input Dims": [[[8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096]], []], "Ev Idx": 14534 + } + }, + { + "ph": "s", "id": 31, "pid": 2338710, "tid": 2338710, "ts": 6345936932397.191, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 2338710, "tid": 2338710, + "ts": 6345936932408.198, "dur": 28.808, + "args": { + "External id": 975491,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1]], []], "Input Dims": [[[8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096]], []], "Ev Idx": 14535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936932438.653, "dur": 1.352, + "args": { + "External id": 975492,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 16384], []], "Ev Idx": 14536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345936932481.333, "dur": 53.974, + "args": { + "External id": 975493,"Record function id": 0, "Ev Idx": 14537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 2/0", "pid": 2338710, "tid": 2338710, + "ts": 6345936932538.182, "dur": 213.587, + "args": { + "External id": 975494,"Record function id": 0, "Ev Idx": 14538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936932576.700, "dur": 165.521, + "args": { + "External id": 975495,"Sequence number": 10552436, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1], [67108864, 16384, 4096, 1]], "Input Dims": [[4096], [8, 4096, 4, 4096]], "Ev Idx": 14539 + } + }, + { + "ph": "s", "id": 30, "pid": 2338710, "tid": 2338710, "ts": 6345936932576.700, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345936932656.811, "dur": 43.698, + "args": { + "External id": 975496,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[131072, 4096], [131072, 4096], [4096], [131072], [], [], [], [], [], [], [], [], []], "Ev Idx": 14540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2338710, + "ts": 6345936932848.181, "dur": 39.066, + "args": { + "External id": 975497,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 14541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936932851.067, "dur": 5.605, + "args": { + "External id": 975498,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345936932860.252, "dur": 26.643, + "args": { + "External id": 975499,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 14543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345936932863.023, "dur": 23.376, + "args": { + "External id": 975500,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 14544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2338710, + "ts": 6345936932893.231, "dur": 23.789, + "args": { + "External id": 975501,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 14545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936932894.129, "dur": 3.378, + "args": { + "External id": 975502,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345936932900.712, "dur": 15.944, + "args": { + "External id": 975503,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 14547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345936932901.647, "dur": 14.503, + "args": { + "External id": 975504,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 14548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2338710, + "ts": 6345936932920.583, "dur": 18.493, + "args": { + "External id": 975505,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 14549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936932921.354, "dur": 2.443, + "args": { + "External id": 975506,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345936932924.396, "dur": 14.366, + "args": { + "External id": 975507,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 14551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345936932925.008, "dur": 13.253, + "args": { + "External id": 975508,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 14552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936932951.664, "dur": 0.779, + "args": { + "External id": 975509,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], [], []], "Ev Idx": 14553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338710, "tid": 2338710, + "ts": 6345936932960.838, "dur": 11.479, + "args": { + "External id": 975510,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "5", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 14554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936932968.694, "dur": 2.014, + "args": { + "External id": 975511,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 14555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936932979.432, "dur": 7.376, + "args": { + "External id": 975512,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 14556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936932983.991, "dur": 1.079, + "args": { + "External id": 975513,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 14557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936932988.320, "dur": 6.044, + "args": { + "External id": 975514,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 14558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936932990.161, "dur": 3.217, + "args": { + "External id": 975515,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 14559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936932995.819, "dur": 3.018, + "args": { + "External id": 975516,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "1", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 14560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936932997.613, "dur": 0.601, + "args": { + "External id": 975517,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 14561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936933002.421, "dur": 2.860, + "args": { + "External id": 975518,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 4], [], [], [], []], "Ev Idx": 14562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936933004.026, "dur": 0.562, + "args": { + "External id": 975519,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 4], [], [], []], "Ev Idx": 14563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936933006.764, "dur": 26.771, + "args": { + "External id": 975520,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 4], [], [], [], []], "Ev Idx": 14564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936933031.194, "dur": 0.881, + "args": { + "External id": 975521,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 4], [], [], []], "Ev Idx": 14565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936933035.441, "dur": 2.984, + "args": { + "External id": 975522,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4096, 4], [], [], [], []], "Ev Idx": 14566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936933037.296, "dur": 0.490, + "args": { + "External id": 975523,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4096, 4], [], [], []], "Ev Idx": 14567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345936933045.401, "dur": 5.710, + "args": { + "External id": 975524,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "2"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4096, 4], [], []], "Ev Idx": 14568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936933049.648, "dur": 0.516, + "args": { + "External id": 975525,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4096, 4], [], [], []], "Ev Idx": 14569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936933098.767, "dur": 5.873, + "args": { + "External id": 975526,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 14570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936933102.530, "dur": 0.800, + "args": { + "External id": 975527,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 14571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2338710, + "ts": 6345936933110.778, "dur": 7.767, + "args": { + "External id": 975528,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 14572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936933116.299, "dur": 0.595, + "args": { + "External id": 975529,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 14573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936933119.919, "dur": 5.375, + "args": { + "External id": 975530,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 14574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936933122.039, "dur": 2.575, + "args": { + "External id": 975531,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 14575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936933129.518, "dur": 9.207, + "args": { + "External id": 975532,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 14576 + } + }, + { + "ph": "s", "id": 29, "pid": 2338710, "tid": 2338710, "ts": 6345936933129.518, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936933136.114, "dur": 0.697, + "args": { + "External id": 975533,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 14577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936933140.137, "dur": 5.260, + "args": { + "External id": 975534,"Sequence number": 10552438, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 14578 + } + }, + { + "ph": "s", "id": 28, "pid": 2338710, "tid": 2338710, "ts": 6345936933140.137, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936933143.761, "dur": 0.720, + "args": { + "External id": 975535,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 14579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2338710, + "ts": 6345936933146.652, "dur": 8.024, + "args": { + "External id": 975536,"Sequence number": 10552439, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 14580 + } + }, + { + "ph": "s", "id": 27, "pid": 2338710, "tid": 2338710, "ts": 6345936933146.652, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936933153.015, "dur": 0.540, + "args": { + "External id": 975537,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 14581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345936933155.778, "dur": 4.449, + "args": { + "External id": 975538,"Sequence number": 10552440, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 14582 + } + }, + { + "ph": "s", "id": 26, "pid": 2338710, "tid": 2338710, "ts": 6345936933155.778, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936933158.629, "dur": 0.678, + "args": { + "External id": 975539,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 14583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338710, "tid": 2338710, + "ts": 6345936933164.485, "dur": 50.531, + "args": { + "External id": 975540,"Sequence number": 10552441, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 14584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2338710, + "ts": 6345936933166.710, "dur": 47.814, + "args": { + "External id": 975541,"Sequence number": 10552441, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 14585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936933169.906, "dur": 9.156, + "args": { + "External id": 975542,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 14586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936933171.907, "dur": 6.358, + "args": { + "External id": 975543,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936933181.326, "dur": 32.688, + "args": { + "External id": 975544,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 14588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936933246.703, "dur": 4.651, + "args": { + "External id": 975545,"Sequence number": 10552441, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14589 + } + }, + { + "ph": "s", "id": 25, "pid": 2338710, "tid": 2338710, "ts": 6345936933246.703, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345936933255.999, "dur": 1.272, + "args": { + "External id": 975546,"Sequence number": 10552442, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 14590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345936933287.010, "dur": 130640.914, + "args": { + "External id": 975547,"Sequence number": 10552442, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 14591 + } + }, + { + "ph": "s", "id": 24, "pid": 2338710, "tid": 2338710, "ts": 6345936933287.010, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338710, "tid": 2338710, + "ts": 6345936933303.533, "dur": 34.762, + "args": { + "External id": 975548,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2338710, + "ts": 6345936933304.350, "dur": 33.663, + "args": { + "External id": 975549,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936933306.310, "dur": 7.975, + "args": { + "External id": 975550,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936933307.826, "dur": 6.042, + "args": { + "External id": 975551,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936933315.059, "dur": 22.346, + "args": { + "External id": 975552,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 14596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936933357.669, "dur": 31.079, + "args": { + "External id": 975553,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936933359.128, "dur": 6.421, + "args": { + "External id": 975554,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936933361.426, "dur": 3.749, + "args": { + "External id": 975555,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345936933366.686, "dur": 21.809, + "args": { + "External id": 975556,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 14600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345936933370.862, "dur": 17.092, + "args": { + "External id": 975557,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936933393.851, "dur": 23.424, + "args": { + "External id": 975558,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 14602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345936933394.808, "dur": 4.773, + "args": { + "External id": 975559,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 14603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936933396.271, "dur": 2.998, + "args": { + "External id": 975560,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345936933400.365, "dur": 16.643, + "args": { + "External id": 975561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345936933400.997, "dur": 15.487, + "args": { + "External id": 975562,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 14606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2338710, + "ts": 6345936933422.836, "dur": 22.290, + "args": { + "External id": 975563,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 14607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345936933424.359, "dur": 3.044, + "args": { + "External id": 975564,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345936933428.410, "dur": 16.403, + "args": { + "External id": 975565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 14609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345936933431.759, "dur": 12.653, + "args": { + "External id": 975566,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 14610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338710, "tid": 2338710, + "ts": 6345936933452.908, "dur": 30.023, + "args": { + "External id": 975567,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 14611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345936933486.957, "dur": 64.968, + "args": { + "External id": 975568,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 14612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345936933490.694, "dur": 60.743, + "args": { + "External id": 975569,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936933496.755, "dur": 0.974, + "args": { + "External id": 975570,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 14614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345936933499.411, "dur": 30.444, + "args": { + "External id": 975571,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345936933503.327, "dur": 26.284, + "args": { + "External id": 975572,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 14616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345936933506.077, "dur": 2.947, + "args": { + "External id": 975573,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345936933509.913, "dur": 19.173, + "args": { + "External id": 975574,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 14618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338710, "tid": 2338710, + "ts": 6345936933560.464, "dur": 123258.322, + "args": { + "External id": 975575,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 14619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338710, "tid": 2338710, + "ts": 6345936933562.587, "dur": 123253.968, + "args": { + "External id": 975576,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 14620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937056837.122, "dur": 13.696, + "args": { + "External id": 975577,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937056845.608, "dur": 1.413, + "args": { + "External id": 975578,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937056858.843, "dur": 180.995, + "args": { + "External id": 975579,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937056861.344, "dur": 9.809, + "args": { + "External id": 975580,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937056864.646, "dur": 5.520, + "args": { + "External id": 975581,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937056867.504, "dur": 2.302, + "args": { + "External id": 975582,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937056872.906, "dur": 166.197, + "args": { + "External id": 975583,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937056889.957, "dur": 147.734, + "args": { + "External id": 975584,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937057050.572, "dur": 40.288, + "args": { + "External id": 975585,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937057083.540, "dur": 4.470, + "args": { + "External id": 975586,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937057105.869, "dur": 5.376, + "args": { + "External id": 975587,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937057127.580, "dur": 10.497, + "args": { + "External id": 975588,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937057131.406, "dur": 6.297, + "args": { + "External id": 975589,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937057346.389, "dur": 317.858, + "args": { + "External id": 975590,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937057352.687, "dur": 4.160, + "args": { + "External id": 975591,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937057359.605, "dur": 302.699, + "args": { + "External id": 975592,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937057364.498, "dur": 1.148, + "args": { + "External id": 975593,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937057369.647, "dur": 40.037, + "args": { + "External id": 975594,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937057413.427, "dur": 7.000, + "args": { + "External id": 975595,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937057419.047, "dur": 0.767, + "args": { + "External id": 975596,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937057422.345, "dur": 40.236, + "args": { + "External id": 975597,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937057425.054, "dur": 1.518, + "args": { + "External id": 975598,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937057429.034, "dur": 33.176, + "args": { + "External id": 975599,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937057438.018, "dur": 5.116, + "args": { + "External id": 975600,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937057466.685, "dur": 33.621, + "args": { + "External id": 975601,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937057504.137, "dur": 26.206, + "args": { + "External id": 975602,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937057536.047, "dur": 22.399, + "args": { + "External id": 975603,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937057562.887, "dur": 20.827, + "args": { + "External id": 975604,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937057587.458, "dur": 30.101, + "args": { + "External id": 975605,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937057590.082, "dur": 2.925, + "args": { + "External id": 975606,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937057595.600, "dur": 0.808, + "args": { + "External id": 975607,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937057623.480, "dur": 20.139, + "args": { + "External id": 975608,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937057646.538, "dur": 14.361, + "args": { + "External id": 975609,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937057674.967, "dur": 2.081, + "args": { + "External id": 975610,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937057687.544, "dur": 5.753, + "args": { + "External id": 975611,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937057691.647, "dur": 0.441, + "args": { + "External id": 975612,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937057802.092, "dur": 96.431, + "args": { + "External id": 975613,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937057913.051, "dur": 8.191, + "args": { + "External id": 975614,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937057918.727, "dur": 0.889, + "args": { + "External id": 975615,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937057924.335, "dur": 36.306, + "args": { + "External id": 975616,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937057967.047, "dur": 9.090, + "args": { + "External id": 975617,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937057969.231, "dur": 6.045, + "args": { + "External id": 975618,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937057972.340, "dur": 2.585, + "args": { + "External id": 975619,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937057980.847, "dur": 121.992, + "args": { + "External id": 975620,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937057982.464, "dur": 118.932, + "args": { + "External id": 975621,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937058114.898, "dur": 26.718, + "args": { + "External id": 975622,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937058152.103, "dur": 6.914, + "args": { + "External id": 975623,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937058156.654, "dur": 0.906, + "args": { + "External id": 975624,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937058165.376, "dur": 74.705, + "args": { + "External id": 975625,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937058166.785, "dur": 11.372, + "args": { + "External id": 975626,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937058168.468, "dur": 8.901, + "args": { + "External id": 975627,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937058173.936, "dur": 3.160, + "args": { + "External id": 975628,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937058180.346, "dur": 59.327, + "args": { + "External id": 975629,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937058181.685, "dur": 57.161, + "args": { + "External id": 975630,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937058245.965, "dur": 4.525, + "args": { + "External id": 975631,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937058248.007, "dur": 0.613, + "args": { + "External id": 975632,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937058259.298, "dur": 2.288, + "args": { + "External id": 975633,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937058273.686, "dur": 11.853, + "args": { + "External id": 975634,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937058278.821, "dur": 6.389, + "args": { + "External id": 975635,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937058416.867, "dur": 242.109, + "args": { + "External id": 975636,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937058420.785, "dur": 2.998, + "args": { + "External id": 975637,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937058427.800, "dur": 230.560, + "args": { + "External id": 975638,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937058429.668, "dur": 0.372, + "args": { + "External id": 975639,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937058431.508, "dur": 28.700, + "args": { + "External id": 975640,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937058462.415, "dur": 5.155, + "args": { + "External id": 975641,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937058464.605, "dur": 2.605, + "args": { + "External id": 975642,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937058471.638, "dur": 26.838, + "args": { + "External id": 975643,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937058472.824, "dur": 1.367, + "args": { + "External id": 975644,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937058475.768, "dur": 22.382, + "args": { + "External id": 975645,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937058479.097, "dur": 2.760, + "args": { + "External id": 975646,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937058500.491, "dur": 30.821, + "args": { + "External id": 975647,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937058533.303, "dur": 16.701, + "args": { + "External id": 975648,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937058553.505, "dur": 18.542, + "args": { + "External id": 975649,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937058573.947, "dur": 15.935, + "args": { + "External id": 975650,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937058592.448, "dur": 30.188, + "args": { + "External id": 975651,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937058594.920, "dur": 2.689, + "args": { + "External id": 975652,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937058602.560, "dur": 2.130, + "args": { + "External id": 975653,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937058624.921, "dur": 15.977, + "args": { + "External id": 975654,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937058642.684, "dur": 14.340, + "args": { + "External id": 975655,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937058667.148, "dur": 2.306, + "args": { + "External id": 975656,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937058681.183, "dur": 5.053, + "args": { + "External id": 975657,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937058684.651, "dur": 0.496, + "args": { + "External id": 975658,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937058769.213, "dur": 75.012, + "args": { + "External id": 975659,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937058851.068, "dur": 7.849, + "args": { + "External id": 975660,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937058856.626, "dur": 0.710, + "args": { + "External id": 975661,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937058860.616, "dur": 31.055, + "args": { + "External id": 975662,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937058897.770, "dur": 6.593, + "args": { + "External id": 975663,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937058899.653, "dur": 3.813, + "args": { + "External id": 975664,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937058901.429, "dur": 1.848, + "args": { + "External id": 975665,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937058907.464, "dur": 61.121, + "args": { + "External id": 975666,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937058911.379, "dur": 56.485, + "args": { + "External id": 975667,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937058973.356, "dur": 55.251, + "args": { + "External id": 975668,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937059040.801, "dur": 6.331, + "args": { + "External id": 975669,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937059043.830, "dur": 1.908, + "args": { + "External id": 975670,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937059052.399, "dur": 108.043, + "args": { + "External id": 975671,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937059053.514, "dur": 45.339, + "args": { + "External id": 975672,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937059054.915, "dur": 42.889, + "args": { + "External id": 975673,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937059094.823, "dur": 1.089, + "args": { + "External id": 975674,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937059099.923, "dur": 59.960, + "args": { + "External id": 975675,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937059100.856, "dur": 58.292, + "args": { + "External id": 975676,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937059167.453, "dur": 6.959, + "args": { + "External id": 975677,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937059169.796, "dur": 2.953, + "args": { + "External id": 975678,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937059182.999, "dur": 2.062, + "args": { + "External id": 975679,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937059197.666, "dur": 9.239, + "args": { + "External id": 975680,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937059200.363, "dur": 6.126, + "args": { + "External id": 975681,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937059322.216, "dur": 228.491, + "args": { + "External id": 975682,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937059328.012, "dur": 2.315, + "args": { + "External id": 975683,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937059331.853, "dur": 218.115, + "args": { + "External id": 975684,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937059333.501, "dur": 0.623, + "args": { + "External id": 975685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937059335.772, "dur": 28.130, + "args": { + "External id": 975686,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937059368.461, "dur": 4.918, + "args": { + "External id": 975687,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937059372.407, "dur": 0.685, + "args": { + "External id": 975688,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937059374.313, "dur": 25.643, + "args": { + "External id": 975689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937059375.274, "dur": 1.393, + "args": { + "External id": 975690,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937059378.259, "dur": 21.288, + "args": { + "External id": 975691,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937059381.543, "dur": 3.398, + "args": { + "External id": 975692,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937059401.787, "dur": 27.722, + "args": { + "External id": 975693,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937059431.440, "dur": 18.427, + "args": { + "External id": 975694,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937059453.343, "dur": 16.981, + "args": { + "External id": 975695,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937059472.043, "dur": 15.804, + "args": { + "External id": 975696,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937059492.179, "dur": 26.741, + "args": { + "External id": 975697,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937059496.333, "dur": 1.833, + "args": { + "External id": 975698,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937059500.599, "dur": 0.499, + "args": { + "External id": 975699,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937059520.541, "dur": 14.826, + "args": { + "External id": 975700,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937059536.732, "dur": 12.173, + "args": { + "External id": 975701,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937059558.616, "dur": 2.003, + "args": { + "External id": 975702,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937059573.854, "dur": 5.268, + "args": { + "External id": 975703,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937059577.521, "dur": 0.637, + "args": { + "External id": 975704,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937059658.655, "dur": 63.287, + "args": { + "External id": 975705,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937059728.052, "dur": 5.149, + "args": { + "External id": 975706,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937059731.221, "dur": 0.738, + "args": { + "External id": 975707,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937059734.988, "dur": 29.586, + "args": { + "External id": 975708,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937059769.736, "dur": 8.632, + "args": { + "External id": 975709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937059771.535, "dur": 6.108, + "args": { + "External id": 975710,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937059775.861, "dur": 1.586, + "args": { + "External id": 975711,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937059781.355, "dur": 48.390, + "args": { + "External id": 975712,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937059782.273, "dur": 46.850, + "args": { + "External id": 975713,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937059834.248, "dur": 17.560, + "args": { + "External id": 975714,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937059858.876, "dur": 3.899, + "args": { + "External id": 975715,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937059861.048, "dur": 0.721, + "args": { + "External id": 975716,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937059869.930, "dur": 50.381, + "args": { + "External id": 975717,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937059870.861, "dur": 4.214, + "args": { + "External id": 975718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937059871.837, "dur": 2.571, + "args": { + "External id": 975719,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937059873.285, "dur": 0.963, + "args": { + "External id": 975720,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937059875.893, "dur": 43.959, + "args": { + "External id": 975721,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937059876.772, "dur": 42.455, + "args": { + "External id": 975722,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937059924.754, "dur": 4.014, + "args": { + "External id": 975723,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937059926.850, "dur": 0.605, + "args": { + "External id": 975724,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937059937.813, "dur": 1.483, + "args": { + "External id": 975725,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937059948.156, "dur": 8.566, + "args": { + "External id": 975726,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937059950.222, "dur": 6.180, + "args": { + "External id": 975727,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937060118.863, "dur": 222.325, + "args": { + "External id": 975728,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937060123.322, "dur": 3.926, + "args": { + "External id": 975729,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937060131.384, "dur": 209.168, + "args": { + "External id": 975730,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937060133.033, "dur": 0.406, + "args": { + "External id": 975731,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937060134.756, "dur": 28.160, + "args": { + "External id": 975732,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937060165.238, "dur": 4.755, + "args": { + "External id": 975733,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937060168.794, "dur": 0.828, + "args": { + "External id": 975734,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937060171.115, "dur": 25.965, + "args": { + "External id": 975735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937060172.404, "dur": 1.772, + "args": { + "External id": 975736,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937060175.562, "dur": 21.150, + "args": { + "External id": 975737,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937060178.996, "dur": 2.846, + "args": { + "External id": 975738,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937060198.826, "dur": 24.761, + "args": { + "External id": 975739,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937060225.609, "dur": 17.916, + "args": { + "External id": 975740,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937060249.065, "dur": 16.718, + "args": { + "External id": 975741,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937060267.639, "dur": 14.789, + "args": { + "External id": 975742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937060284.514, "dur": 23.759, + "args": { + "External id": 975743,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937060286.990, "dur": 2.010, + "args": { + "External id": 975744,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937060291.407, "dur": 0.755, + "args": { + "External id": 975745,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937060309.613, "dur": 14.503, + "args": { + "External id": 975746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937060328.027, "dur": 11.443, + "args": { + "External id": 975747,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937060350.319, "dur": 2.501, + "args": { + "External id": 975748,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937060364.288, "dur": 4.365, + "args": { + "External id": 975749,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937060367.006, "dur": 0.616, + "args": { + "External id": 975750,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937060448.397, "dur": 69.901, + "args": { + "External id": 975751,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937060523.947, "dur": 5.049, + "args": { + "External id": 975752,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937060526.978, "dur": 0.897, + "args": { + "External id": 975753,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937060530.692, "dur": 29.446, + "args": { + "External id": 975754,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937060565.770, "dur": 8.145, + "args": { + "External id": 975755,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937060569.602, "dur": 3.561, + "args": { + "External id": 975756,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937060571.610, "dur": 1.372, + "args": { + "External id": 975757,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937060576.867, "dur": 50.812, + "args": { + "External id": 975758,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937060578.226, "dur": 48.771, + "args": { + "External id": 975759,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937060632.360, "dur": 18.012, + "args": { + "External id": 975760,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937060657.747, "dur": 6.093, + "args": { + "External id": 975761,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937060662.270, "dur": 0.523, + "args": { + "External id": 975762,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937060668.654, "dur": 55.037, + "args": { + "External id": 975763,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937060669.723, "dur": 5.733, + "args": { + "External id": 975764,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937060670.644, "dur": 4.133, + "args": { + "External id": 975765,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937060671.959, "dur": 2.613, + "args": { + "External id": 975766,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937060676.477, "dur": 46.698, + "args": { + "External id": 975767,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937060677.191, "dur": 45.086, + "args": { + "External id": 975768,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937060731.601, "dur": 3.976, + "args": { + "External id": 975769,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937060733.731, "dur": 0.444, + "args": { + "External id": 975770,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937060741.173, "dur": 1.551, + "args": { + "External id": 975771,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937060751.532, "dur": 8.173, + "args": { + "External id": 975772,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937060753.924, "dur": 5.438, + "args": { + "External id": 975773,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937060857.536, "dur": 274.025, + "args": { + "External id": 975774,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937060859.908, "dur": 1.998, + "args": { + "External id": 975775,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937060863.811, "dur": 266.971, + "args": { + "External id": 975776,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937060865.765, "dur": 0.432, + "args": { + "External id": 975777,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937060867.521, "dur": 25.894, + "args": { + "External id": 975778,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937060895.254, "dur": 3.110, + "args": { + "External id": 975779,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937060897.357, "dur": 0.807, + "args": { + "External id": 975780,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937060899.488, "dur": 29.263, + "args": { + "External id": 975781,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937060900.588, "dur": 1.721, + "args": { + "External id": 975782,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937060903.705, "dur": 24.630, + "args": { + "External id": 975783,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937060910.678, "dur": 3.098, + "args": { + "External id": 975784,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937060930.399, "dur": 23.804, + "args": { + "External id": 975785,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937060955.650, "dur": 15.444, + "args": { + "External id": 975786,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937060974.872, "dur": 15.202, + "args": { + "External id": 975787,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937060991.608, "dur": 14.713, + "args": { + "External id": 975788,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937061027.252, "dur": 64.505, + "args": { + "External id": 975789,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937061029.946, "dur": 3.681, + "args": { + "External id": 975790,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937061036.176, "dur": 0.876, + "args": { + "External id": 975791,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937061097.726, "dur": 17.382, + "args": { + "External id": 975792,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937061116.345, "dur": 13.002, + "args": { + "External id": 975793,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937061141.349, "dur": 2.928, + "args": { + "External id": 975794,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937061155.552, "dur": 4.467, + "args": { + "External id": 975795,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937061158.382, "dur": 0.571, + "args": { + "External id": 975796,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937061244.277, "dur": 70.426, + "args": { + "External id": 975797,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937061320.812, "dur": 8.475, + "args": { + "External id": 975798,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937061326.777, "dur": 1.165, + "args": { + "External id": 975799,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937061330.827, "dur": 28.202, + "args": { + "External id": 975800,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937061364.593, "dur": 6.414, + "args": { + "External id": 975801,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937061366.639, "dur": 3.478, + "args": { + "External id": 975802,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937061368.582, "dur": 1.298, + "args": { + "External id": 975803,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937061373.893, "dur": 50.764, + "args": { + "External id": 975804,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937061375.002, "dur": 48.767, + "args": { + "External id": 975805,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937061431.308, "dur": 18.248, + "args": { + "External id": 975806,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937061456.812, "dur": 3.925, + "args": { + "External id": 975807,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937061458.972, "dur": 0.819, + "args": { + "External id": 975808,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937061465.603, "dur": 55.801, + "args": { + "External id": 975809,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937061466.638, "dur": 8.768, + "args": { + "External id": 975810,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937061467.898, "dur": 6.789, + "args": { + "External id": 975811,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937061471.733, "dur": 2.707, + "args": { + "External id": 975812,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937061476.380, "dur": 44.660, + "args": { + "External id": 975813,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937061476.941, "dur": 43.436, + "args": { + "External id": 975814,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937061526.168, "dur": 6.629, + "args": { + "External id": 975815,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937061528.249, "dur": 2.934, + "args": { + "External id": 975816,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937061539.359, "dur": 1.678, + "args": { + "External id": 975817,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937061550.102, "dur": 9.480, + "args": { + "External id": 975818,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937061555.029, "dur": 4.265, + "args": { + "External id": 975819,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937061664.115, "dur": 230.492, + "args": { + "External id": 975820,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937061666.850, "dur": 2.214, + "args": { + "External id": 975821,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937061670.863, "dur": 223.155, + "args": { + "External id": 975822,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937061672.510, "dur": 0.471, + "args": { + "External id": 975823,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937061675.892, "dur": 25.128, + "args": { + "External id": 975824,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937061703.018, "dur": 25.425, + "args": { + "External id": 975825,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937061727.168, "dur": 1.039, + "args": { + "External id": 975826,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937061732.297, "dur": 26.904, + "args": { + "External id": 975827,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937061733.362, "dur": 3.199, + "args": { + "External id": 975828,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937061737.927, "dur": 20.754, + "args": { + "External id": 975829,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937061741.185, "dur": 2.732, + "args": { + "External id": 975830,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937061761.123, "dur": 26.569, + "args": { + "External id": 975831,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937061789.347, "dur": 15.254, + "args": { + "External id": 975832,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937061808.124, "dur": 14.790, + "args": { + "External id": 975833,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937061824.594, "dur": 13.287, + "args": { + "External id": 975834,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937061839.941, "dur": 26.087, + "args": { + "External id": 975835,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937061842.047, "dur": 1.913, + "args": { + "External id": 975836,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937061851.137, "dur": 0.714, + "args": { + "External id": 975837,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937061867.387, "dur": 12.766, + "args": { + "External id": 975838,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937061881.294, "dur": 11.159, + "args": { + "External id": 975839,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937061902.545, "dur": 1.972, + "args": { + "External id": 975840,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937061915.097, "dur": 4.476, + "args": { + "External id": 975841,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937061917.844, "dur": 0.770, + "args": { + "External id": 975842,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937061994.330, "dur": 127.027, + "args": { + "External id": 975843,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937062130.885, "dur": 11.385, + "args": { + "External id": 975844,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937062137.370, "dur": 2.859, + "args": { + "External id": 975845,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937062144.193, "dur": 36.811, + "args": { + "External id": 975846,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937062187.419, "dur": 6.363, + "args": { + "External id": 975847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937062189.175, "dur": 3.682, + "args": { + "External id": 975848,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937062191.335, "dur": 1.267, + "args": { + "External id": 975849,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937062197.326, "dur": 57.267, + "args": { + "External id": 975850,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937062201.102, "dur": 52.915, + "args": { + "External id": 975851,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937062259.127, "dur": 19.275, + "args": { + "External id": 975852,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937062285.932, "dur": 4.308, + "args": { + "External id": 975853,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937062288.449, "dur": 0.801, + "args": { + "External id": 975854,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937062295.162, "dur": 53.760, + "args": { + "External id": 975855,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937062296.243, "dur": 6.057, + "args": { + "External id": 975856,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937062297.426, "dur": 4.130, + "args": { + "External id": 975857,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937062300.785, "dur": 0.595, + "args": { + "External id": 975858,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937062303.060, "dur": 45.422, + "args": { + "External id": 975859,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937062303.508, "dur": 44.242, + "args": { + "External id": 975860,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937062354.193, "dur": 6.557, + "args": { + "External id": 975861,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937062356.253, "dur": 3.338, + "args": { + "External id": 975862,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937062368.569, "dur": 1.953, + "args": { + "External id": 975863,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937062382.491, "dur": 7.582, + "args": { + "External id": 975864,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937062384.709, "dur": 4.994, + "args": { + "External id": 975865,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937062495.929, "dur": 245.782, + "args": { + "External id": 975866,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937062498.892, "dur": 4.437, + "args": { + "External id": 975867,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937062505.015, "dur": 236.129, + "args": { + "External id": 975868,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937062506.773, "dur": 0.500, + "args": { + "External id": 975869,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937062508.596, "dur": 26.214, + "args": { + "External id": 975870,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937062539.293, "dur": 5.010, + "args": { + "External id": 975871,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937062543.410, "dur": 0.667, + "args": { + "External id": 975872,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937062567.441, "dur": 26.452, + "args": { + "External id": 975873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937062568.444, "dur": 1.806, + "args": { + "External id": 975874,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937062571.656, "dur": 21.924, + "args": { + "External id": 975875,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937062574.623, "dur": 3.478, + "args": { + "External id": 975876,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937062595.483, "dur": 25.114, + "args": { + "External id": 975877,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937062622.146, "dur": 16.336, + "args": { + "External id": 975878,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937062641.369, "dur": 16.442, + "args": { + "External id": 975879,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937062659.500, "dur": 20.583, + "args": { + "External id": 975880,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937062684.753, "dur": 24.894, + "args": { + "External id": 975881,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937062688.690, "dur": 1.796, + "args": { + "External id": 975882,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937062692.528, "dur": 0.758, + "args": { + "External id": 975883,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937062711.557, "dur": 15.155, + "args": { + "External id": 975884,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937062727.986, "dur": 11.973, + "args": { + "External id": 975885,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937062750.592, "dur": 2.186, + "args": { + "External id": 975886,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937062764.965, "dur": 4.072, + "args": { + "External id": 975887,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937062767.624, "dur": 0.415, + "args": { + "External id": 975888,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937062845.646, "dur": 60.938, + "args": { + "External id": 975889,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937062912.786, "dur": 4.992, + "args": { + "External id": 975890,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937062915.796, "dur": 0.641, + "args": { + "External id": 975891,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937062919.275, "dur": 24.254, + "args": { + "External id": 975892,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937062947.823, "dur": 7.588, + "args": { + "External id": 975893,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937062949.428, "dur": 5.147, + "args": { + "External id": 975894,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937062953.482, "dur": 0.936, + "args": { + "External id": 975895,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937062957.950, "dur": 47.045, + "args": { + "External id": 975896,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937062959.141, "dur": 44.998, + "args": { + "External id": 975897,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937063029.413, "dur": 23.359, + "args": { + "External id": 975898,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937063100.329, "dur": 7.345, + "args": { + "External id": 975899,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937063104.978, "dur": 1.111, + "args": { + "External id": 975900,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937063116.171, "dur": 59.635, + "args": { + "External id": 975901,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937063117.289, "dur": 4.609, + "args": { + "External id": 975902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937063118.838, "dur": 2.345, + "args": { + "External id": 975903,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937063120.396, "dur": 0.605, + "args": { + "External id": 975904,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937063122.621, "dur": 52.622, + "args": { + "External id": 975905,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937063123.305, "dur": 51.229, + "args": { + "External id": 975906,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937063181.244, "dur": 4.188, + "args": { + "External id": 975907,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937063183.383, "dur": 0.739, + "args": { + "External id": 975908,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937063194.901, "dur": 1.907, + "args": { + "External id": 975909,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937063206.158, "dur": 9.168, + "args": { + "External id": 975910,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937063208.261, "dur": 6.638, + "args": { + "External id": 975911,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937063319.329, "dur": 202.930, + "args": { + "External id": 975912,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937063323.564, "dur": 3.369, + "args": { + "External id": 975913,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937063331.381, "dur": 190.257, + "args": { + "External id": 975914,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937063332.932, "dur": 0.419, + "args": { + "External id": 975915,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937063334.792, "dur": 25.528, + "args": { + "External id": 975916,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937063362.087, "dur": 5.074, + "args": { + "External id": 975917,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937063366.066, "dur": 0.770, + "args": { + "External id": 975918,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937063368.123, "dur": 22.730, + "args": { + "External id": 975919,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937063369.296, "dur": 1.677, + "args": { + "External id": 975920,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937063372.262, "dur": 18.187, + "args": { + "External id": 975921,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937063375.506, "dur": 2.774, + "args": { + "External id": 975922,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937063392.521, "dur": 23.669, + "args": { + "External id": 975923,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937063418.416, "dur": 14.056, + "args": { + "External id": 975924,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937063438.112, "dur": 14.681, + "args": { + "External id": 975925,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937063454.447, "dur": 13.317, + "args": { + "External id": 975926,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937063469.933, "dur": 21.386, + "args": { + "External id": 975927,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937063472.713, "dur": 1.660, + "args": { + "External id": 975928,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937063476.034, "dur": 0.491, + "args": { + "External id": 975929,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937063492.908, "dur": 12.580, + "args": { + "External id": 975930,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937063509.340, "dur": 10.959, + "args": { + "External id": 975931,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937063529.709, "dur": 1.970, + "args": { + "External id": 975932,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937063541.643, "dur": 3.764, + "args": { + "External id": 975933,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937063543.818, "dur": 0.694, + "args": { + "External id": 975934,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937063616.518, "dur": 58.807, + "args": { + "External id": 975935,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937063680.763, "dur": 4.814, + "args": { + "External id": 975936,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937063683.600, "dur": 0.624, + "args": { + "External id": 975937,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937063687.107, "dur": 26.746, + "args": { + "External id": 975938,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937063718.565, "dur": 7.759, + "args": { + "External id": 975939,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937063722.603, "dur": 2.981, + "args": { + "External id": 975940,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937063724.314, "dur": 1.099, + "args": { + "External id": 975941,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937063729.118, "dur": 46.305, + "args": { + "External id": 975942,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937063730.160, "dur": 44.532, + "args": { + "External id": 975943,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937063779.617, "dur": 16.312, + "args": { + "External id": 975944,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937063801.585, "dur": 29.362, + "args": { + "External id": 975945,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 14989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937063804.002, "dur": 26.381, + "args": { + "External id": 975946,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937063810.940, "dur": 2.858, + "args": { + "External id": 975947,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 14991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345937063840.198, "dur": 30.988, + "args": { + "External id": 975948,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 14992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345937063842.377, "dur": 28.553, + "args": { + "External id": 975949,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 14993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937063847.974, "dur": 4.986, + "args": { + "External id": 975950,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937063854.275, "dur": 16.085, + "args": { + "External id": 975951,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2338710, + "ts": 6345937063886.578, "dur": 7.268, + "args": { + "External id": 975952,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 14996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2338710, + "ts": 6345937063889.951, "dur": 3.567, + "args": { + "External id": 975953,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 14997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2338710, + "ts": 6345937063895.135, "dur": 4.452, + "args": { + "External id": 975954,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2338710, + "ts": 6345937063898.598, "dur": 0.906, + "args": { + "External id": 975955,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937063950.217, "dur": 32.273, + "args": { + "External id": 975956,"Sequence number": 10552443, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 15000 + } + }, + { + "ph": "s", "id": 23, "pid": 2338710, "tid": 2338710, "ts": 6345937063950.217, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937063990.707, "dur": 6.677, + "args": { + "External id": 975957,"Sequence number": 10552444, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 15001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937063994.432, "dur": 1.224, + "args": { + "External id": 975958,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2338710, + "ts": 6345937064000.109, "dur": 6.712, + "args": { + "External id": 975959,"Sequence number": 10552444, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "1"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 15003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937064005.078, "dur": 0.436, + "args": { + "External id": 975960,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "2"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937064031.520, "dur": 5.451, + "args": { + "External id": 975961,"Sequence number": 10552444, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 15005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937064034.997, "dur": 0.706, + "args": { + "External id": 975962,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "2"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 15006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937064043.251, "dur": 6.947, + "args": { + "External id": 975963,"Sequence number": 10552444, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15007 + } + }, + { + "ph": "s", "id": 22, "pid": 2338710, "tid": 2338710, "ts": 6345937064043.251, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937064047.065, "dur": 0.833, + "args": { + "External id": 975964,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937064051.328, "dur": 50.053, + "args": { + "External id": 975965,"Sequence number": 10552445, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15009 + } + }, + { + "ph": "s", "id": 21, "pid": 2338710, "tid": 2338710, "ts": 6345937064051.328, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937064097.043, "dur": 0.944, + "args": { + "External id": 975966,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2338710, + "ts": 6345937064103.261, "dur": 6.339, + "args": { + "External id": 975967,"Sequence number": 10552446, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 15011 + } + }, + { + "ph": "s", "id": 20, "pid": 2338710, "tid": 2338710, "ts": 6345937064103.261, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937064107.638, "dur": 0.707, + "args": { + "External id": 975968,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937064111.343, "dur": 8.338, + "args": { + "External id": 975969,"Sequence number": 10552447, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 15013 + } + }, + { + "ph": "s", "id": 19, "pid": 2338710, "tid": 2338710, "ts": 6345937064111.343, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937064114.764, "dur": 3.859, + "args": { + "External id": 975970,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 15014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338710, "tid": 2338710, + "ts": 6345937064125.082, "dur": 41.071, + "args": { + "External id": 975971,"Sequence number": 10552448, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2338710, + "ts": 6345937064126.809, "dur": 39.105, + "args": { + "External id": 975972,"Sequence number": 10552448, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345937064130.013, "dur": 11.464, + "args": { + "External id": 975973,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 15017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937064134.976, "dur": 5.800, + "args": { + "External id": 975974,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937064142.868, "dur": 22.615, + "args": { + "External id": 975975,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 15019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937064198.008, "dur": 4.213, + "args": { + "External id": 975976,"Sequence number": 10552448, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 15020 + } + }, + { + "ph": "s", "id": 18, "pid": 2338710, "tid": 2338710, "ts": 6345937064198.008, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937064205.096, "dur": 1.288, + "args": { + "External id": 975977,"Sequence number": 10552449, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345937064244.142, "dur": 45033.013, + "args": { + "External id": 975978,"Sequence number": 10552449, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 15022 + } + }, + { + "ph": "s", "id": 17, "pid": 2338710, "tid": 2338710, "ts": 6345937064244.142, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338710, "tid": 2338710, + "ts": 6345937064264.216, "dur": 33.275, + "args": { + "External id": 975979,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2338710, + "ts": 6345937064267.763, "dur": 29.476, + "args": { + "External id": 975980,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345937064269.216, "dur": 7.766, + "args": { + "External id": 975981,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937064270.898, "dur": 5.545, + "args": { + "External id": 975982,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937064278.016, "dur": 18.640, + "args": { + "External id": 975983,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 15027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338710, "tid": 2338710, + "ts": 6345937064317.288, "dur": 28.442, + "args": { + "External id": 975984,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345937064318.805, "dur": 6.563, + "args": { + "External id": 975985,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937064321.042, "dur": 3.941, + "args": { + "External id": 975986,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345937064327.103, "dur": 18.282, + "args": { + "External id": 975987,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937064329.458, "dur": 15.305, + "args": { + "External id": 975988,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338710, "tid": 2338710, + "ts": 6345937064349.754, "dur": 23.082, + "args": { + "External id": 975989,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345937064350.818, "dur": 4.616, + "args": { + "External id": 975990,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937064352.360, "dur": 2.808, + "args": { + "External id": 975991,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345937064358.667, "dur": 13.925, + "args": { + "External id": 975992,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937064359.364, "dur": 12.792, + "args": { + "External id": 975993,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 15037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2338710, + "ts": 6345937064379.974, "dur": 23.338, + "args": { + "External id": 975994,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 15038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937064382.070, "dur": 4.036, + "args": { + "External id": 975995,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345937064387.227, "dur": 15.779, + "args": { + "External id": 975996,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 15040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937064388.174, "dur": 14.430, + "args": { + "External id": 975997,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338710, "tid": 2338710, + "ts": 6345937064409.238, "dur": 26.664, + "args": { + "External id": 975998,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937064439.459, "dur": 55.805, + "args": { + "External id": 975999,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937064443.967, "dur": 50.807, + "args": { + "External id": 976000,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937064450.929, "dur": 0.871, + "args": { + "External id": 976001,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 15045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345937064453.234, "dur": 24.388, + "args": { + "External id": 976002,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345937064455.067, "dur": 22.241, + "args": { + "External id": 976003,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 15047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937064458.094, "dur": 3.138, + "args": { + "External id": 976004,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937064462.282, "dur": 14.439, + "args": { + "External id": 976005,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 15049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338710, "tid": 2338710, + "ts": 6345937064500.106, "dur": 37768.809, + "args": { + "External id": 976006,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338710, "tid": 2338710, + "ts": 6345937064501.709, "dur": 37765.354, + "args": { + "External id": 976007,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937102284.234, "dur": 7.968, + "args": { + "External id": 976008,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937102289.291, "dur": 1.123, + "args": { + "External id": 976009,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937102301.131, "dur": 129.334, + "args": { + "External id": 976010,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937102303.066, "dur": 7.146, + "args": { + "External id": 976011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937102305.520, "dur": 3.706, + "args": { + "External id": 976012,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937102308.069, "dur": 0.820, + "args": { + "External id": 976013,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937102312.017, "dur": 117.715, + "args": { + "External id": 976014,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937102314.155, "dur": 114.158, + "args": { + "External id": 976015,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937102436.343, "dur": 5.949, + "args": { + "External id": 976016,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937102440.065, "dur": 0.802, + "args": { + "External id": 976017,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937102454.983, "dur": 3.768, + "args": { + "External id": 976018,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937102471.301, "dur": 9.271, + "args": { + "External id": 976019,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937102475.153, "dur": 5.045, + "args": { + "External id": 976020,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937102664.986, "dur": 275.329, + "args": { + "External id": 976021,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937102670.757, "dur": 5.329, + "args": { + "External id": 976022,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937102681.474, "dur": 257.098, + "args": { + "External id": 976023,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937102683.657, "dur": 0.823, + "args": { + "External id": 976024,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937102687.003, "dur": 30.151, + "args": { + "External id": 976025,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937102720.745, "dur": 6.080, + "args": { + "External id": 976026,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937102725.745, "dur": 0.690, + "args": { + "External id": 976027,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937102728.499, "dur": 32.031, + "args": { + "External id": 976028,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937102729.651, "dur": 2.654, + "args": { + "External id": 976029,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937102733.985, "dur": 26.174, + "args": { + "External id": 976030,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937102739.622, "dur": 4.883, + "args": { + "External id": 976031,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937102766.726, "dur": 28.660, + "args": { + "External id": 976032,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937102798.874, "dur": 20.005, + "args": { + "External id": 976033,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937102824.266, "dur": 20.601, + "args": { + "External id": 976034,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937102846.588, "dur": 17.976, + "args": { + "External id": 976035,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937102867.905, "dur": 28.184, + "args": { + "External id": 976036,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937102870.599, "dur": 2.146, + "args": { + "External id": 976037,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937102876.465, "dur": 0.546, + "args": { + "External id": 976038,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937102902.071, "dur": 17.831, + "args": { + "External id": 976039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937102923.345, "dur": 14.070, + "args": { + "External id": 976040,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937102949.949, "dur": 2.241, + "args": { + "External id": 976041,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937102960.935, "dur": 5.775, + "args": { + "External id": 976042,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937102965.105, "dur": 0.596, + "args": { + "External id": 976043,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937103131.983, "dur": 97.776, + "args": { + "External id": 976044,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937103238.816, "dur": 11.230, + "args": { + "External id": 976045,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937103244.257, "dur": 1.224, + "args": { + "External id": 976046,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937103255.605, "dur": 35.284, + "args": { + "External id": 976047,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937103299.229, "dur": 12.319, + "args": { + "External id": 976048,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937103302.514, "dur": 8.066, + "args": { + "External id": 976049,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937103305.061, "dur": 5.236, + "args": { + "External id": 976050,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937103316.823, "dur": 62.368, + "args": { + "External id": 976051,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937103318.386, "dur": 59.959, + "args": { + "External id": 976052,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937103385.865, "dur": 21.686, + "args": { + "External id": 976053,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937103418.996, "dur": 5.137, + "args": { + "External id": 976054,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937103422.431, "dur": 0.561, + "args": { + "External id": 976055,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937103431.041, "dur": 61.854, + "args": { + "External id": 976056,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937103432.322, "dur": 5.755, + "args": { + "External id": 976057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937103434.625, "dur": 2.693, + "args": { + "External id": 976058,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937103436.535, "dur": 0.587, + "args": { + "External id": 976059,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937103442.266, "dur": 50.113, + "args": { + "External id": 976060,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937103443.283, "dur": 48.611, + "args": { + "External id": 976061,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937103498.985, "dur": 27.097, + "args": { + "External id": 976062,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937103521.730, "dur": 2.966, + "args": { + "External id": 976063,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937103533.799, "dur": 2.271, + "args": { + "External id": 976064,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937103546.429, "dur": 11.686, + "args": { + "External id": 976065,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937103551.829, "dur": 5.981, + "args": { + "External id": 976066,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937103682.625, "dur": 233.568, + "args": { + "External id": 976067,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937103686.457, "dur": 2.354, + "args": { + "External id": 976068,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937103690.835, "dur": 224.779, + "args": { + "External id": 976069,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937103692.539, "dur": 0.363, + "args": { + "External id": 976070,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937103694.189, "dur": 31.796, + "args": { + "External id": 976071,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937103727.809, "dur": 3.500, + "args": { + "External id": 976072,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937103730.285, "dur": 0.750, + "args": { + "External id": 976073,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937103732.612, "dur": 30.419, + "args": { + "External id": 976074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937103736.555, "dur": 1.655, + "args": { + "External id": 976075,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937103739.519, "dur": 23.177, + "args": { + "External id": 976076,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937103743.999, "dur": 3.330, + "args": { + "External id": 976077,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937103764.800, "dur": 27.389, + "args": { + "External id": 976078,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937103794.054, "dur": 16.542, + "args": { + "External id": 976079,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937103813.773, "dur": 18.669, + "args": { + "External id": 976080,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937103834.170, "dur": 15.713, + "args": { + "External id": 976081,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937103852.116, "dur": 29.518, + "args": { + "External id": 976082,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937103854.316, "dur": 2.450, + "args": { + "External id": 976083,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937103861.814, "dur": 0.675, + "args": { + "External id": 976084,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937103883.585, "dur": 15.082, + "args": { + "External id": 976085,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937103900.041, "dur": 14.002, + "args": { + "External id": 976086,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937103923.894, "dur": 2.117, + "args": { + "External id": 976087,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937103936.549, "dur": 5.274, + "args": { + "External id": 976088,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937103940.249, "dur": 0.526, + "args": { + "External id": 976089,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937104046.470, "dur": 119.252, + "args": { + "External id": 976090,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937104178.580, "dur": 13.410, + "args": { + "External id": 976091,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937104186.172, "dur": 3.773, + "args": { + "External id": 976092,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937104194.041, "dur": 37.338, + "args": { + "External id": 976093,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937104237.554, "dur": 7.136, + "args": { + "External id": 976094,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937104239.265, "dur": 4.643, + "args": { + "External id": 976095,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937104241.477, "dur": 2.031, + "args": { + "External id": 976096,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937104248.679, "dur": 53.923, + "args": { + "External id": 976097,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937104250.234, "dur": 51.723, + "args": { + "External id": 976098,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937104310.110, "dur": 24.814, + "args": { + "External id": 976099,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937104342.805, "dur": 5.183, + "args": { + "External id": 976100,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937104345.425, "dur": 1.313, + "args": { + "External id": 976101,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937104353.135, "dur": 82.829, + "args": { + "External id": 976102,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937104354.494, "dur": 6.588, + "args": { + "External id": 976103,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937104355.490, "dur": 4.946, + "args": { + "External id": 976104,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937104359.575, "dur": 0.704, + "args": { + "External id": 976105,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937104362.040, "dur": 73.502, + "args": { + "External id": 976106,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937104363.019, "dur": 71.868, + "args": { + "External id": 976107,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937104442.719, "dur": 7.628, + "args": { + "External id": 976108,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937104445.061, "dur": 3.994, + "args": { + "External id": 976109,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937104457.784, "dur": 1.985, + "args": { + "External id": 976110,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937104476.131, "dur": 13.820, + "args": { + "External id": 976111,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937104481.837, "dur": 7.714, + "args": { + "External id": 976112,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937104608.025, "dur": 205.240, + "args": { + "External id": 976113,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937104610.557, "dur": 3.323, + "args": { + "External id": 976114,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937104615.472, "dur": 197.085, + "args": { + "External id": 976115,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937104617.338, "dur": 0.412, + "args": { + "External id": 976116,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937104619.303, "dur": 26.516, + "args": { + "External id": 976117,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937104647.792, "dur": 3.602, + "args": { + "External id": 976118,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937104650.390, "dur": 0.752, + "args": { + "External id": 976119,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937104655.065, "dur": 25.693, + "args": { + "External id": 976120,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937104656.205, "dur": 1.508, + "args": { + "External id": 976121,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937104659.025, "dur": 21.285, + "args": { + "External id": 976122,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937104664.279, "dur": 3.262, + "args": { + "External id": 976123,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937104682.352, "dur": 24.953, + "args": { + "External id": 976124,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937104709.379, "dur": 15.018, + "args": { + "External id": 976125,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937104727.686, "dur": 14.042, + "args": { + "External id": 976126,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937104743.496, "dur": 14.703, + "args": { + "External id": 976127,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937104760.267, "dur": 23.338, + "args": { + "External id": 976128,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937104764.935, "dur": 1.973, + "args": { + "External id": 976129,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937104768.871, "dur": 0.536, + "args": { + "External id": 976130,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937104785.072, "dur": 14.231, + "args": { + "External id": 976131,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937104800.827, "dur": 10.443, + "args": { + "External id": 976132,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937104820.602, "dur": 1.912, + "args": { + "External id": 976133,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937104832.024, "dur": 4.202, + "args": { + "External id": 976134,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937104834.722, "dur": 0.539, + "args": { + "External id": 976135,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937104918.706, "dur": 60.740, + "args": { + "External id": 976136,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937104985.570, "dur": 5.536, + "args": { + "External id": 976137,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937104988.613, "dur": 1.046, + "args": { + "External id": 976138,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937104992.519, "dur": 49.818, + "args": { + "External id": 976139,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937105049.529, "dur": 48.214, + "args": { + "External id": 976140,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937105051.194, "dur": 45.350, + "args": { + "External id": 976141,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937105093.516, "dur": 1.321, + "args": { + "External id": 976142,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937105101.719, "dur": 58.922, + "args": { + "External id": 976143,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937105103.094, "dur": 56.604, + "args": { + "External id": 976144,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937105165.707, "dur": 19.761, + "args": { + "External id": 976145,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937105193.516, "dur": 4.606, + "args": { + "External id": 976146,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937105196.152, "dur": 0.848, + "args": { + "External id": 976147,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937105203.088, "dur": 56.504, + "args": { + "External id": 976148,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937105206.641, "dur": 5.705, + "args": { + "External id": 976149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937105207.832, "dur": 3.797, + "args": { + "External id": 976150,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937105209.262, "dur": 2.145, + "args": { + "External id": 976151,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937105213.371, "dur": 45.816, + "args": { + "External id": 976152,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937105214.349, "dur": 44.176, + "args": { + "External id": 976153,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937105265.062, "dur": 3.998, + "args": { + "External id": 976154,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937105266.900, "dur": 0.741, + "args": { + "External id": 976155,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937105278.803, "dur": 1.857, + "args": { + "External id": 976156,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937105290.084, "dur": 10.399, + "args": { + "External id": 976157,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937105292.573, "dur": 7.567, + "args": { + "External id": 976158,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937105408.908, "dur": 198.847, + "args": { + "External id": 976159,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937105412.410, "dur": 2.423, + "args": { + "External id": 976160,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937105416.165, "dur": 190.995, + "args": { + "External id": 976161,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937105420.709, "dur": 0.377, + "args": { + "External id": 976162,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937105423.965, "dur": 25.521, + "args": { + "External id": 976163,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937105451.186, "dur": 3.252, + "args": { + "External id": 976164,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937105453.522, "dur": 0.664, + "args": { + "External id": 976165,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937105455.581, "dur": 24.596, + "args": { + "External id": 976166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937105456.569, "dur": 3.289, + "args": { + "External id": 976167,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937105461.339, "dur": 18.531, + "args": { + "External id": 976168,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937105464.329, "dur": 2.807, + "args": { + "External id": 976169,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937105481.833, "dur": 23.299, + "args": { + "External id": 976170,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937105506.777, "dur": 13.529, + "args": { + "External id": 976171,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937105526.340, "dur": 15.470, + "args": { + "External id": 976172,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937105543.476, "dur": 13.254, + "args": { + "External id": 976173,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937105558.640, "dur": 20.759, + "args": { + "External id": 976174,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937105560.485, "dur": 1.690, + "args": { + "External id": 976175,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937105564.150, "dur": 0.550, + "args": { + "External id": 976176,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937105580.828, "dur": 12.573, + "args": { + "External id": 976177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937105594.630, "dur": 11.236, + "args": { + "External id": 976178,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937105617.698, "dur": 2.027, + "args": { + "External id": 976179,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937105628.844, "dur": 4.011, + "args": { + "External id": 976180,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937105631.350, "dur": 0.533, + "args": { + "External id": 976181,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937105712.142, "dur": 62.479, + "args": { + "External id": 976182,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937105780.669, "dur": 4.991, + "args": { + "External id": 976183,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937105783.583, "dur": 0.824, + "args": { + "External id": 976184,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937105787.463, "dur": 30.312, + "args": { + "External id": 976185,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937105822.655, "dur": 9.300, + "args": { + "External id": 976186,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937105824.195, "dur": 6.968, + "args": { + "External id": 976187,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937105828.266, "dur": 2.629, + "args": { + "External id": 976188,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937105835.318, "dur": 47.644, + "args": { + "External id": 976189,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937105836.677, "dur": 45.341, + "args": { + "External id": 976190,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937105887.525, "dur": 16.105, + "args": { + "External id": 976191,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937105910.886, "dur": 3.863, + "args": { + "External id": 976192,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937105913.062, "dur": 0.567, + "args": { + "External id": 976193,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937105921.556, "dur": 57.364, + "args": { + "External id": 976194,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937105922.575, "dur": 6.003, + "args": { + "External id": 976195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937105923.320, "dur": 4.599, + "args": { + "External id": 976196,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937105924.668, "dur": 3.061, + "args": { + "External id": 976197,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937105929.574, "dur": 48.704, + "args": { + "External id": 976198,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937105930.386, "dur": 47.140, + "args": { + "External id": 976199,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937105984.228, "dur": 4.161, + "args": { + "External id": 976200,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937105986.322, "dur": 0.547, + "args": { + "External id": 976201,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937105996.457, "dur": 1.611, + "args": { + "External id": 976202,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937106006.760, "dur": 29.710, + "args": { + "External id": 976203,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937106028.920, "dur": 6.885, + "args": { + "External id": 976204,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937106200.415, "dur": 211.203, + "args": { + "External id": 976205,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937106203.639, "dur": 3.111, + "args": { + "External id": 976206,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937106213.097, "dur": 197.735, + "args": { + "External id": 976207,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937106214.869, "dur": 0.540, + "args": { + "External id": 976208,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937106216.816, "dur": 26.121, + "args": { + "External id": 976209,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937106244.978, "dur": 4.828, + "args": { + "External id": 976210,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937106247.036, "dur": 2.377, + "args": { + "External id": 976211,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937106251.053, "dur": 23.562, + "args": { + "External id": 976212,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937106252.085, "dur": 1.455, + "args": { + "External id": 976213,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937106254.970, "dur": 19.337, + "args": { + "External id": 976214,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937106258.195, "dur": 3.598, + "args": { + "External id": 976215,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937106279.435, "dur": 26.210, + "args": { + "External id": 976216,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937106307.670, "dur": 15.356, + "args": { + "External id": 976217,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937106326.735, "dur": 15.109, + "args": { + "External id": 976218,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937106343.460, "dur": 13.600, + "args": { + "External id": 976219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937106359.098, "dur": 21.142, + "args": { + "External id": 976220,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937106361.230, "dur": 1.750, + "args": { + "External id": 976221,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937106365.473, "dur": 0.842, + "args": { + "External id": 976222,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937106384.658, "dur": 12.683, + "args": { + "External id": 976223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937106398.817, "dur": 10.789, + "args": { + "External id": 976224,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937106419.770, "dur": 2.604, + "args": { + "External id": 976225,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937106433.390, "dur": 4.825, + "args": { + "External id": 976226,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937106436.584, "dur": 0.686, + "args": { + "External id": 976227,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937106515.471, "dur": 71.949, + "args": { + "External id": 976228,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937106593.687, "dur": 6.583, + "args": { + "External id": 976229,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937106596.720, "dur": 2.084, + "args": { + "External id": 976230,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937106604.753, "dur": 27.819, + "args": { + "External id": 976231,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937106637.724, "dur": 5.911, + "args": { + "External id": 976232,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937106639.555, "dur": 3.338, + "args": { + "External id": 976233,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937106641.751, "dur": 0.931, + "args": { + "External id": 976234,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937106646.864, "dur": 47.135, + "args": { + "External id": 976235,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937106648.138, "dur": 45.107, + "args": { + "External id": 976236,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937106698.850, "dur": 15.556, + "args": { + "External id": 976237,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937106723.585, "dur": 4.067, + "args": { + "External id": 976238,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937106725.785, "dur": 0.720, + "args": { + "External id": 976239,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937106732.535, "dur": 50.401, + "args": { + "External id": 976240,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937106733.610, "dur": 3.962, + "args": { + "External id": 976241,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937106734.535, "dur": 2.372, + "args": { + "External id": 976242,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937106736.106, "dur": 0.646, + "args": { + "External id": 976243,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937106740.776, "dur": 41.664, + "args": { + "External id": 976244,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937106741.338, "dur": 40.450, + "args": { + "External id": 976245,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937106788.211, "dur": 22.272, + "args": { + "External id": 976246,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937106806.045, "dur": 2.884, + "args": { + "External id": 976247,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937106816.685, "dur": 1.686, + "args": { + "External id": 976248,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937106827.605, "dur": 10.350, + "args": { + "External id": 976249,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937106832.095, "dur": 5.442, + "args": { + "External id": 976250,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937106933.451, "dur": 287.368, + "args": { + "External id": 976251,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937106935.699, "dur": 2.387, + "args": { + "External id": 976252,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937106939.432, "dur": 280.645, + "args": { + "External id": 976253,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937106941.122, "dur": 0.373, + "args": { + "External id": 976254,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937106942.739, "dur": 23.793, + "args": { + "External id": 976255,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937106968.349, "dur": 3.268, + "args": { + "External id": 976256,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937106970.738, "dur": 0.687, + "args": { + "External id": 976257,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937106972.880, "dur": 27.573, + "args": { + "External id": 976258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937106976.664, "dur": 1.483, + "args": { + "External id": 976259,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937106979.240, "dur": 20.860, + "args": { + "External id": 976260,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937106983.520, "dur": 2.515, + "args": { + "External id": 976261,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937107001.972, "dur": 52.554, + "args": { + "External id": 976262,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937107093.180, "dur": 19.785, + "args": { + "External id": 976263,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937107116.660, "dur": 17.362, + "args": { + "External id": 976264,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937107135.894, "dur": 14.539, + "args": { + "External id": 976265,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937107152.993, "dur": 28.784, + "args": { + "External id": 976266,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937107155.694, "dur": 2.574, + "args": { + "External id": 976267,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937107163.071, "dur": 0.728, + "args": { + "External id": 976268,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937107183.371, "dur": 20.443, + "args": { + "External id": 976269,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937107205.528, "dur": 13.250, + "args": { + "External id": 976270,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937107230.935, "dur": 2.852, + "args": { + "External id": 976271,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937107245.793, "dur": 4.872, + "args": { + "External id": 976272,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937107248.964, "dur": 0.760, + "args": { + "External id": 976273,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937107333.633, "dur": 72.624, + "args": { + "External id": 976274,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937107412.226, "dur": 7.801, + "args": { + "External id": 976275,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937107417.664, "dur": 1.046, + "args": { + "External id": 976276,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937107421.713, "dur": 30.327, + "args": { + "External id": 976277,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937107457.074, "dur": 6.519, + "args": { + "External id": 976278,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937107458.978, "dur": 3.925, + "args": { + "External id": 976279,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937107461.725, "dur": 0.975, + "args": { + "External id": 976280,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937107466.850, "dur": 46.820, + "args": { + "External id": 976281,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937107468.025, "dur": 44.734, + "args": { + "External id": 976282,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937107521.260, "dur": 17.873, + "args": { + "External id": 976283,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937107545.918, "dur": 4.001, + "args": { + "External id": 976284,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937107548.193, "dur": 0.580, + "args": { + "External id": 976285,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937107554.380, "dur": 59.223, + "args": { + "External id": 976286,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937107555.273, "dur": 8.630, + "args": { + "External id": 976287,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937107556.080, "dur": 7.118, + "args": { + "External id": 976288,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937107560.393, "dur": 2.605, + "args": { + "External id": 976289,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937107564.597, "dur": 48.507, + "args": { + "External id": 976290,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937107565.399, "dur": 46.750, + "args": { + "External id": 976291,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937107618.642, "dur": 6.469, + "args": { + "External id": 976292,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937107620.977, "dur": 2.690, + "args": { + "External id": 976293,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937107631.181, "dur": 1.534, + "args": { + "External id": 976294,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937107641.647, "dur": 9.530, + "args": { + "External id": 976295,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937107646.731, "dur": 4.136, + "args": { + "External id": 976296,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937107745.606, "dur": 209.501, + "args": { + "External id": 976297,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937107747.971, "dur": 2.337, + "args": { + "External id": 976298,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937107751.861, "dur": 202.596, + "args": { + "External id": 976299,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937107753.348, "dur": 0.540, + "args": { + "External id": 976300,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937107757.100, "dur": 23.433, + "args": { + "External id": 976301,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937107782.409, "dur": 3.087, + "args": { + "External id": 976302,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937107784.579, "dur": 0.633, + "args": { + "External id": 976303,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937107789.182, "dur": 31.135, + "args": { + "External id": 976304,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937107795.646, "dur": 2.661, + "args": { + "External id": 976305,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937107799.718, "dur": 20.314, + "args": { + "External id": 976306,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937107802.605, "dur": 2.914, + "args": { + "External id": 976307,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937107821.831, "dur": 24.564, + "args": { + "External id": 976308,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937107847.755, "dur": 14.971, + "args": { + "External id": 976309,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937107865.995, "dur": 15.648, + "args": { + "External id": 976310,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937107882.963, "dur": 14.492, + "args": { + "External id": 976311,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937107899.713, "dur": 24.560, + "args": { + "External id": 976312,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937107904.121, "dur": 1.681, + "args": { + "External id": 976313,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937107907.537, "dur": 0.673, + "args": { + "External id": 976314,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937107925.669, "dur": 14.539, + "args": { + "External id": 976315,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937107941.524, "dur": 11.921, + "args": { + "External id": 976316,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937107961.968, "dur": 1.909, + "args": { + "External id": 976317,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937107972.585, "dur": 4.152, + "args": { + "External id": 976318,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937107975.114, "dur": 0.677, + "args": { + "External id": 976319,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937108117.902, "dur": 71.210, + "args": { + "External id": 976320,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937108196.094, "dur": 6.759, + "args": { + "External id": 976321,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937108199.643, "dur": 1.362, + "args": { + "External id": 976322,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937108204.694, "dur": 30.722, + "args": { + "External id": 976323,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937108241.189, "dur": 8.005, + "args": { + "External id": 976324,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937108242.704, "dur": 5.740, + "args": { + "External id": 976325,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937108247.076, "dur": 1.159, + "args": { + "External id": 976326,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937108252.445, "dur": 47.856, + "args": { + "External id": 976327,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937108253.599, "dur": 45.798, + "args": { + "External id": 976328,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937108304.568, "dur": 19.148, + "args": { + "External id": 976329,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937108330.701, "dur": 4.558, + "args": { + "External id": 976330,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937108333.297, "dur": 0.791, + "args": { + "External id": 976331,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937108339.759, "dur": 57.688, + "args": { + "External id": 976332,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937108342.696, "dur": 10.521, + "args": { + "External id": 976333,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937108343.616, "dur": 8.867, + "args": { + "External id": 976334,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937108351.574, "dur": 0.756, + "args": { + "External id": 976335,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937108354.124, "dur": 42.828, + "args": { + "External id": 976336,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937108354.756, "dur": 41.531, + "args": { + "External id": 976337,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937108402.654, "dur": 6.659, + "args": { + "External id": 976338,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937108404.677, "dur": 3.190, + "args": { + "External id": 976339,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937108418.603, "dur": 1.830, + "args": { + "External id": 976340,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937108429.192, "dur": 7.020, + "args": { + "External id": 976341,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937108431.505, "dur": 4.300, + "args": { + "External id": 976342,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937108533.559, "dur": 209.585, + "args": { + "External id": 976343,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937108536.510, "dur": 2.352, + "args": { + "External id": 976344,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937108540.426, "dur": 202.079, + "args": { + "External id": 976345,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937108546.421, "dur": 0.296, + "args": { + "External id": 976346,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937108548.017, "dur": 24.464, + "args": { + "External id": 976347,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937108574.359, "dur": 2.825, + "args": { + "External id": 976348,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937108576.212, "dur": 0.719, + "args": { + "External id": 976349,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937108578.147, "dur": 25.998, + "args": { + "External id": 976350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937108581.189, "dur": 1.435, + "args": { + "External id": 976351,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937108583.790, "dur": 20.082, + "args": { + "External id": 976352,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937108586.726, "dur": 2.438, + "args": { + "External id": 976353,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937108605.704, "dur": 25.330, + "args": { + "External id": 976354,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937108632.832, "dur": 15.478, + "args": { + "External id": 976355,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937108653.376, "dur": 15.304, + "args": { + "External id": 976356,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937108670.417, "dur": 15.519, + "args": { + "External id": 976357,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937108688.421, "dur": 22.236, + "args": { + "External id": 976358,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937108690.637, "dur": 1.929, + "args": { + "External id": 976359,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937108694.528, "dur": 0.700, + "args": { + "External id": 976360,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937108712.354, "dur": 14.614, + "args": { + "External id": 976361,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937108728.138, "dur": 13.089, + "args": { + "External id": 976362,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937108752.259, "dur": 1.745, + "args": { + "External id": 976363,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937108763.366, "dur": 3.956, + "args": { + "External id": 976364,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937108765.856, "dur": 0.608, + "args": { + "External id": 976365,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937108836.780, "dur": 57.434, + "args": { + "External id": 976366,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937108899.503, "dur": 6.488, + "args": { + "External id": 976367,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937108902.691, "dur": 2.020, + "args": { + "External id": 976368,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937108907.537, "dur": 28.437, + "args": { + "External id": 976369,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937108940.911, "dur": 7.913, + "args": { + "External id": 976370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937108942.614, "dur": 5.488, + "args": { + "External id": 976371,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937108947.043, "dur": 0.861, + "args": { + "External id": 976372,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937108950.951, "dur": 45.683, + "args": { + "External id": 976373,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937108952.088, "dur": 43.840, + "args": { + "External id": 976374,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937109000.555, "dur": 35.681, + "args": { + "External id": 976375,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937109043.852, "dur": 70.372, + "args": { + "External id": 976376,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937109046.926, "dur": 66.459, + "args": { + "External id": 976377,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937109053.422, "dur": 1.186, + "args": { + "External id": 976378,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 15422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345937109122.161, "dur": 37.608, + "args": { + "External id": 976379,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345937109124.467, "dur": 35.051, + "args": { + "External id": 976380,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 15424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937109132.987, "dur": 5.103, + "args": { + "External id": 976381,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937109139.706, "dur": 19.322, + "args": { + "External id": 976382,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2338710, + "ts": 6345937109173.941, "dur": 6.757, + "args": { + "External id": 976383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2338710, + "ts": 6345937109177.173, "dur": 3.239, + "args": { + "External id": 976384,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2338710, + "ts": 6345937109245.697, "dur": 1.467, + "args": { + "External id": 976385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2338710, + "ts": 6345937109246.181, "dur": 0.752, + "args": { + "External id": 976386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937109297.823, "dur": 32.285, + "args": { + "External id": 976387,"Sequence number": 10552450, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 15431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937109332.376, "dur": 16.336, + "args": { + "External id": 976388,"Sequence number": 10552451, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 15432 + } + }, + { + "ph": "s", "id": 16, "pid": 2338710, "tid": 2338710, "ts": 6345937109332.376, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937109356.955, "dur": 7.685, + "args": { + "External id": 976389,"Sequence number": 10552452, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 15433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937109361.167, "dur": 1.679, + "args": { + "External id": 976390,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2338710, + "ts": 6345937109367.627, "dur": 7.006, + "args": { + "External id": 976391,"Sequence number": 10552452, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "2"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 15435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937109372.564, "dur": 0.442, + "args": { + "External id": 976392,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "3"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937109376.005, "dur": 4.484, + "args": { + "External id": 976393,"Sequence number": 10552452, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 15437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937109379.523, "dur": 0.306, + "args": { + "External id": 976394,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "3"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 15438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937109385.221, "dur": 6.411, + "args": { + "External id": 976395,"Sequence number": 10552452, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15439 + } + }, + { + "ph": "s", "id": 15, "pid": 2338710, "tid": 2338710, "ts": 6345937109385.221, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937109389.025, "dur": 0.959, + "args": { + "External id": 976396,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937109392.786, "dur": 6.601, + "args": { + "External id": 976397,"Sequence number": 10552453, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15441 + } + }, + { + "ph": "s", "id": 14, "pid": 2338710, "tid": 2338710, "ts": 6345937109392.786, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937109398.126, "dur": 0.327, + "args": { + "External id": 976398,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2338710, + "ts": 6345937109402.904, "dur": 5.983, + "args": { + "External id": 976399,"Sequence number": 10552454, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 15443 + } + }, + { + "ph": "s", "id": 13, "pid": 2338710, "tid": 2338710, "ts": 6345937109402.904, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937109407.106, "dur": 0.785, + "args": { + "External id": 976400,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937109410.136, "dur": 7.267, + "args": { + "External id": 976401,"Sequence number": 10552455, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 15445 + } + }, + { + "ph": "s", "id": 12, "pid": 2338710, "tid": 2338710, "ts": 6345937109410.136, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937109412.629, "dur": 3.722, + "args": { + "External id": 976402,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 15446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338710, "tid": 2338710, + "ts": 6345937109421.952, "dur": 40.861, + "args": { + "External id": 976403,"Sequence number": 10552456, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2338710, + "ts": 6345937109423.686, "dur": 38.885, + "args": { + "External id": 976404,"Sequence number": 10552456, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345937109426.458, "dur": 8.289, + "args": { + "External id": 976405,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 15449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937109429.260, "dur": 4.645, + "args": { + "External id": 976406,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937109438.092, "dur": 23.881, + "args": { + "External id": 976407,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 15451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937109494.766, "dur": 4.700, + "args": { + "External id": 976408,"Sequence number": 10552456, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 15452 + } + }, + { + "ph": "s", "id": 11, "pid": 2338710, "tid": 2338710, "ts": 6345937109494.766, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937109502.153, "dur": 2.423, + "args": { + "External id": 976409,"Sequence number": 10552457, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345937109541.658, "dur": 44839.573, + "args": { + "External id": 976410,"Sequence number": 10552457, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 15454 + } + }, + { + "ph": "s", "id": 10, "pid": 2338710, "tid": 2338710, "ts": 6345937109541.658, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338710, "tid": 2338710, + "ts": 6345937109559.770, "dur": 31.987, + "args": { + "External id": 976411,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2338710, + "ts": 6345937109560.604, "dur": 30.916, + "args": { + "External id": 976412,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345937109562.002, "dur": 7.658, + "args": { + "External id": 976413,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937109565.676, "dur": 3.461, + "args": { + "External id": 976414,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937109570.397, "dur": 20.589, + "args": { + "External id": 976415,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 15459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338710, "tid": 2338710, + "ts": 6345937109609.880, "dur": 29.823, + "args": { + "External id": 976416,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345937109610.953, "dur": 5.934, + "args": { + "External id": 976417,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937109613.109, "dur": 3.458, + "args": { + "External id": 976418,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345937109618.533, "dur": 20.907, + "args": { + "External id": 976419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937109620.561, "dur": 18.446, + "args": { + "External id": 976420,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338710, "tid": 2338710, + "ts": 6345937109643.992, "dur": 22.324, + "args": { + "External id": 976421,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345937109644.796, "dur": 4.161, + "args": { + "External id": 976422,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937109645.907, "dur": 2.736, + "args": { + "External id": 976423,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345937109652.251, "dur": 13.828, + "args": { + "External id": 976424,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937109652.646, "dur": 12.994, + "args": { + "External id": 976425,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 15469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2338710, + "ts": 6345937109673.380, "dur": 20.413, + "args": { + "External id": 976426,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 15470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937109675.244, "dur": 4.841, + "args": { + "External id": 976427,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345937109680.798, "dur": 12.562, + "args": { + "External id": 976428,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 15472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937109681.420, "dur": 11.581, + "args": { + "External id": 976429,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338710, "tid": 2338710, + "ts": 6345937109699.240, "dur": 30.028, + "args": { + "External id": 976430,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937109732.399, "dur": 59.453, + "args": { + "External id": 976431,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937109737.114, "dur": 54.164, + "args": { + "External id": 976432,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937109742.544, "dur": 0.775, + "args": { + "External id": 976433,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 15477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345937109744.494, "dur": 24.311, + "args": { + "External id": 976434,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345937109745.955, "dur": 22.609, + "args": { + "External id": 976435,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 15479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937109749.776, "dur": 3.057, + "args": { + "External id": 976436,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937109753.848, "dur": 14.227, + "args": { + "External id": 976437,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 15481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338710, "tid": 2338710, + "ts": 6345937109795.807, "dur": 37856.724, + "args": { + "External id": 976438,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338710, "tid": 2338710, + "ts": 6345937109797.323, "dur": 37853.951, + "args": { + "External id": 976439,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937147665.591, "dur": 8.153, + "args": { + "External id": 976440,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937147670.729, "dur": 1.164, + "args": { + "External id": 976441,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937147682.454, "dur": 128.294, + "args": { + "External id": 976442,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937147684.302, "dur": 6.528, + "args": { + "External id": 976443,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937147686.945, "dur": 2.936, + "args": { + "External id": 976444,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937147688.600, "dur": 0.938, + "args": { + "External id": 976445,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937147692.396, "dur": 117.435, + "args": { + "External id": 976446,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937147694.273, "dur": 114.290, + "args": { + "External id": 976447,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937147816.735, "dur": 6.380, + "args": { + "External id": 976448,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937147820.404, "dur": 0.882, + "args": { + "External id": 976449,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937147834.382, "dur": 3.949, + "args": { + "External id": 976450,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937147850.861, "dur": 8.367, + "args": { + "External id": 976451,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937147854.403, "dur": 4.494, + "args": { + "External id": 976452,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937148083.815, "dur": 250.590, + "args": { + "External id": 976453,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937148089.892, "dur": 5.559, + "args": { + "External id": 976454,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937148101.443, "dur": 232.302, + "args": { + "External id": 976455,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937148105.991, "dur": 1.600, + "args": { + "External id": 976456,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937148109.222, "dur": 36.244, + "args": { + "External id": 976457,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937148147.625, "dur": 4.618, + "args": { + "External id": 976458,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937148150.624, "dur": 1.253, + "args": { + "External id": 976459,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937148153.244, "dur": 30.436, + "args": { + "External id": 976460,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937148156.460, "dur": 1.717, + "args": { + "External id": 976461,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937148159.718, "dur": 23.636, + "args": { + "External id": 976462,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937148164.181, "dur": 4.380, + "args": { + "External id": 976463,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937148188.045, "dur": 26.033, + "args": { + "External id": 976464,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937148216.435, "dur": 17.040, + "args": { + "External id": 976465,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937148237.125, "dur": 18.505, + "args": { + "External id": 976466,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937148257.525, "dur": 15.413, + "args": { + "External id": 976467,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937148275.189, "dur": 23.061, + "args": { + "External id": 976468,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937148277.776, "dur": 1.767, + "args": { + "External id": 976469,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937148281.966, "dur": 0.538, + "args": { + "External id": 976470,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937148302.818, "dur": 14.745, + "args": { + "External id": 976471,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937148319.218, "dur": 13.411, + "args": { + "External id": 976472,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937148344.318, "dur": 3.001, + "args": { + "External id": 976473,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937148356.186, "dur": 5.541, + "args": { + "External id": 976474,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937148360.149, "dur": 0.505, + "args": { + "External id": 976475,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937148459.662, "dur": 90.445, + "args": { + "External id": 976476,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937148557.010, "dur": 9.842, + "args": { + "External id": 976477,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937148560.206, "dur": 3.550, + "args": { + "External id": 976478,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937148570.787, "dur": 35.668, + "args": { + "External id": 976479,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937148612.963, "dur": 8.393, + "args": { + "External id": 976480,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937148615.111, "dur": 5.377, + "args": { + "External id": 976481,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937148618.885, "dur": 1.272, + "args": { + "External id": 976482,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937148625.232, "dur": 58.783, + "args": { + "External id": 976483,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937148626.669, "dur": 56.639, + "args": { + "External id": 976484,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937148689.053, "dur": 18.048, + "args": { + "External id": 976485,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937148716.987, "dur": 5.360, + "args": { + "External id": 976486,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937148720.558, "dur": 0.667, + "args": { + "External id": 976487,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937148727.532, "dur": 54.335, + "args": { + "External id": 976488,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937148728.615, "dur": 4.074, + "args": { + "External id": 976489,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937148729.973, "dur": 2.119, + "args": { + "External id": 976490,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937148731.357, "dur": 0.577, + "args": { + "External id": 976491,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937148735.653, "dur": 45.779, + "args": { + "External id": 976492,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937148736.525, "dur": 44.328, + "args": { + "External id": 976493,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937148786.698, "dur": 4.564, + "args": { + "External id": 976494,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937148789.192, "dur": 0.641, + "args": { + "External id": 976495,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937148797.579, "dur": 1.691, + "args": { + "External id": 976496,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937148809.086, "dur": 11.091, + "args": { + "External id": 976497,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937148813.736, "dur": 6.110, + "args": { + "External id": 976498,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937148934.881, "dur": 274.342, + "args": { + "External id": 976499,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937148938.140, "dur": 3.580, + "args": { + "External id": 976500,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937148943.422, "dur": 265.096, + "args": { + "External id": 976501,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937148945.205, "dur": 0.409, + "args": { + "External id": 976502,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937148946.857, "dur": 23.807, + "args": { + "External id": 976503,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937148972.594, "dur": 7.209, + "args": { + "External id": 976504,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937148976.828, "dur": 2.695, + "args": { + "External id": 976505,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937148983.357, "dur": 22.609, + "args": { + "External id": 976506,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937148984.729, "dur": 1.279, + "args": { + "External id": 976507,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937148987.310, "dur": 18.347, + "args": { + "External id": 976508,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937148990.409, "dur": 2.324, + "args": { + "External id": 976509,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937149025.757, "dur": 67.308, + "args": { + "External id": 976510,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937149096.701, "dur": 18.840, + "args": { + "External id": 976511,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937149119.147, "dur": 15.166, + "args": { + "External id": 976512,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937149136.117, "dur": 13.241, + "args": { + "External id": 976513,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937149151.919, "dur": 27.409, + "args": { + "External id": 976514,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937149156.247, "dur": 2.586, + "args": { + "External id": 976515,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937149163.354, "dur": 0.862, + "args": { + "External id": 976516,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937149181.109, "dur": 13.003, + "args": { + "External id": 976517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937149195.639, "dur": 11.449, + "args": { + "External id": 976518,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937149219.461, "dur": 3.084, + "args": { + "External id": 976519,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937149234.705, "dur": 5.388, + "args": { + "External id": 976520,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937149237.486, "dur": 1.160, + "args": { + "External id": 976521,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937149330.952, "dur": 75.998, + "args": { + "External id": 976522,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937149413.088, "dur": 8.191, + "args": { + "External id": 976523,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937149419.148, "dur": 0.710, + "args": { + "External id": 976524,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937149423.020, "dur": 30.164, + "args": { + "External id": 976525,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937149458.102, "dur": 6.480, + "args": { + "External id": 976526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937149459.859, "dur": 3.952, + "args": { + "External id": 976527,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937149461.662, "dur": 1.785, + "args": { + "External id": 976528,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937149467.706, "dur": 55.549, + "args": { + "External id": 976529,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937149471.459, "dur": 51.137, + "args": { + "External id": 976530,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937149528.222, "dur": 17.073, + "args": { + "External id": 976531,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937149551.899, "dur": 3.903, + "args": { + "External id": 976532,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937149554.112, "dur": 0.648, + "args": { + "External id": 976533,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937149560.439, "dur": 55.542, + "args": { + "External id": 976534,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937149561.411, "dur": 6.026, + "args": { + "External id": 976535,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937149562.459, "dur": 4.310, + "args": { + "External id": 976536,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937149566.083, "dur": 0.539, + "args": { + "External id": 976537,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937149568.363, "dur": 47.267, + "args": { + "External id": 976538,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937149569.300, "dur": 45.630, + "args": { + "External id": 976539,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937149620.746, "dur": 5.731, + "args": { + "External id": 976540,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937149622.650, "dur": 2.444, + "args": { + "External id": 976541,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937149646.823, "dur": 1.712, + "args": { + "External id": 976542,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937149659.228, "dur": 10.604, + "args": { + "External id": 976543,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937149663.386, "dur": 6.046, + "args": { + "External id": 976544,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937149781.339, "dur": 209.784, + "args": { + "External id": 976545,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937149783.888, "dur": 2.599, + "args": { + "External id": 976546,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937149788.294, "dur": 202.203, + "args": { + "External id": 976547,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937149790.009, "dur": 0.328, + "args": { + "External id": 976548,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937149791.554, "dur": 26.747, + "args": { + "External id": 976549,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937149820.470, "dur": 3.240, + "args": { + "External id": 976550,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937149822.728, "dur": 0.733, + "args": { + "External id": 976551,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937149827.472, "dur": 27.154, + "args": { + "External id": 976552,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937149828.807, "dur": 1.781, + "args": { + "External id": 976553,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937149831.716, "dur": 22.575, + "args": { + "External id": 976554,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937149836.204, "dur": 2.687, + "args": { + "External id": 976555,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937149856.345, "dur": 26.352, + "args": { + "External id": 976556,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937149884.716, "dur": 16.538, + "args": { + "External id": 976557,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937149904.423, "dur": 14.940, + "args": { + "External id": 976558,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937149920.768, "dur": 12.420, + "args": { + "External id": 976559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937149935.352, "dur": 24.130, + "args": { + "External id": 976560,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937149939.949, "dur": 2.082, + "args": { + "External id": 976561,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937149943.872, "dur": 0.680, + "args": { + "External id": 976562,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937149960.952, "dur": 14.803, + "args": { + "External id": 976563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937149976.828, "dur": 12.609, + "args": { + "External id": 976564,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937149998.509, "dur": 1.851, + "args": { + "External id": 976565,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937150029.851, "dur": 7.030, + "args": { + "External id": 976566,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937150034.474, "dur": 0.936, + "args": { + "External id": 976567,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937150161.945, "dur": 73.598, + "args": { + "External id": 976568,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937150242.340, "dur": 6.756, + "args": { + "External id": 976569,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937150245.883, "dur": 1.259, + "args": { + "External id": 976570,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937150250.520, "dur": 33.637, + "args": { + "External id": 976571,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937150289.682, "dur": 7.733, + "args": { + "External id": 976572,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937150291.403, "dur": 5.185, + "args": { + "External id": 976573,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937150295.540, "dur": 0.788, + "args": { + "External id": 976574,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937150300.523, "dur": 49.878, + "args": { + "External id": 976575,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937150302.018, "dur": 47.779, + "args": { + "External id": 976576,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937150355.113, "dur": 19.177, + "args": { + "External id": 976577,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937150381.025, "dur": 3.911, + "args": { + "External id": 976578,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937150383.313, "dur": 0.533, + "args": { + "External id": 976579,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937150389.392, "dur": 54.685, + "args": { + "External id": 976580,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937150393.009, "dur": 5.639, + "args": { + "External id": 976581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937150394.013, "dur": 3.854, + "args": { + "External id": 976582,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937150395.478, "dur": 2.107, + "args": { + "External id": 976583,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937150399.345, "dur": 44.370, + "args": { + "External id": 976584,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937150400.032, "dur": 42.855, + "args": { + "External id": 976585,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937150448.989, "dur": 9.361, + "args": { + "External id": 976586,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937150456.692, "dur": 0.465, + "args": { + "External id": 976587,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937150466.545, "dur": 1.714, + "args": { + "External id": 976588,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937150477.632, "dur": 9.176, + "args": { + "External id": 976589,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937150479.750, "dur": 6.743, + "args": { + "External id": 976590,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937150588.626, "dur": 197.618, + "args": { + "External id": 976591,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937150592.329, "dur": 2.356, + "args": { + "External id": 976592,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937150599.308, "dur": 186.460, + "args": { + "External id": 976593,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937150600.932, "dur": 0.545, + "args": { + "External id": 976594,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937150602.822, "dur": 23.553, + "args": { + "External id": 976595,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937150628.231, "dur": 5.166, + "args": { + "External id": 976596,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937150632.332, "dur": 0.796, + "args": { + "External id": 976597,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937150634.367, "dur": 22.669, + "args": { + "External id": 976598,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937150635.626, "dur": 1.546, + "args": { + "External id": 976599,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937150638.403, "dur": 18.315, + "args": { + "External id": 976600,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937150641.568, "dur": 2.745, + "args": { + "External id": 976601,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937150658.519, "dur": 21.086, + "args": { + "External id": 976602,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937150681.379, "dur": 16.409, + "args": { + "External id": 976603,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937150702.999, "dur": 13.954, + "args": { + "External id": 976604,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937150718.709, "dur": 13.844, + "args": { + "External id": 976605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937150734.548, "dur": 22.020, + "args": { + "External id": 976606,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937150736.932, "dur": 1.926, + "args": { + "External id": 976607,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937150741.123, "dur": 0.704, + "args": { + "External id": 976608,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937150758.188, "dur": 12.564, + "args": { + "External id": 976609,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937150774.430, "dur": 10.244, + "args": { + "External id": 976610,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937150793.786, "dur": 1.842, + "args": { + "External id": 976611,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937150805.617, "dur": 3.764, + "args": { + "External id": 976612,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937150808.119, "dur": 0.374, + "args": { + "External id": 976613,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937150884.105, "dur": 62.928, + "args": { + "External id": 976614,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937150952.825, "dur": 5.539, + "args": { + "External id": 976615,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937150956.308, "dur": 0.734, + "args": { + "External id": 976616,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937150960.130, "dur": 26.149, + "args": { + "External id": 976617,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937150991.307, "dur": 7.435, + "args": { + "External id": 976618,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937150995.033, "dur": 3.011, + "args": { + "External id": 976619,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937150996.564, "dur": 1.261, + "args": { + "External id": 976620,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937151001.470, "dur": 114.459, + "args": { + "External id": 976621,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937151002.994, "dur": 111.647, + "args": { + "External id": 976622,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937151123.182, "dur": 18.151, + "args": { + "External id": 976623,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937151149.211, "dur": 8.103, + "args": { + "External id": 976624,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937151154.747, "dur": 1.046, + "args": { + "External id": 976625,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937151162.030, "dur": 57.448, + "args": { + "External id": 976626,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937151163.033, "dur": 6.875, + "args": { + "External id": 976627,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937151164.196, "dur": 5.076, + "args": { + "External id": 976628,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937151165.838, "dur": 3.265, + "args": { + "External id": 976629,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937151170.746, "dur": 48.234, + "args": { + "External id": 976630,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937151171.568, "dur": 46.639, + "args": { + "External id": 976631,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937151224.143, "dur": 3.923, + "args": { + "External id": 976632,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937151226.373, "dur": 0.429, + "args": { + "External id": 976633,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937151238.173, "dur": 1.789, + "args": { + "External id": 976634,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937151250.387, "dur": 8.324, + "args": { + "External id": 976635,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937151252.551, "dur": 5.816, + "args": { + "External id": 976636,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937151366.986, "dur": 215.261, + "args": { + "External id": 976637,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937151369.381, "dur": 2.032, + "args": { + "External id": 976638,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937151375.674, "dur": 206.034, + "args": { + "External id": 976639,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937151377.293, "dur": 0.385, + "args": { + "External id": 976640,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937151378.722, "dur": 29.944, + "args": { + "External id": 976641,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937151410.370, "dur": 3.198, + "args": { + "External id": 976642,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937151412.615, "dur": 0.648, + "args": { + "External id": 976643,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937151414.520, "dur": 26.102, + "args": { + "External id": 976644,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937151415.775, "dur": 1.563, + "args": { + "External id": 976645,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937151418.300, "dur": 22.032, + "args": { + "External id": 976646,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937151422.988, "dur": 3.209, + "args": { + "External id": 976647,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937151444.623, "dur": 24.008, + "args": { + "External id": 976648,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937151470.664, "dur": 13.827, + "args": { + "External id": 976649,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937151487.613, "dur": 18.297, + "args": { + "External id": 976650,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937151507.480, "dur": 14.028, + "args": { + "External id": 976651,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937151523.541, "dur": 24.535, + "args": { + "External id": 976652,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937151526.032, "dur": 2.004, + "args": { + "External id": 976653,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937151530.168, "dur": 0.739, + "args": { + "External id": 976654,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937151552.424, "dur": 13.904, + "args": { + "External id": 976655,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937151567.728, "dur": 12.775, + "args": { + "External id": 976656,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937151590.074, "dur": 1.849, + "args": { + "External id": 976657,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937151601.927, "dur": 3.909, + "args": { + "External id": 976658,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937151604.391, "dur": 0.431, + "args": { + "External id": 976659,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937151680.714, "dur": 61.905, + "args": { + "External id": 976660,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937151749.271, "dur": 5.018, + "args": { + "External id": 976661,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937151752.092, "dur": 0.949, + "args": { + "External id": 976662,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937151758.300, "dur": 30.160, + "args": { + "External id": 976663,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937151793.484, "dur": 5.609, + "args": { + "External id": 976664,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937151795.358, "dur": 3.061, + "args": { + "External id": 976665,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937151797.128, "dur": 1.094, + "args": { + "External id": 976666,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937151801.929, "dur": 46.612, + "args": { + "External id": 976667,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937151803.311, "dur": 44.397, + "args": { + "External id": 976668,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937151853.131, "dur": 17.179, + "args": { + "External id": 976669,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937151879.088, "dur": 3.916, + "args": { + "External id": 976670,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937151881.244, "dur": 0.777, + "args": { + "External id": 976671,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937151887.418, "dur": 55.228, + "args": { + "External id": 976672,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937151888.171, "dur": 5.642, + "args": { + "External id": 976673,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937151889.142, "dur": 4.011, + "args": { + "External id": 976674,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937151890.600, "dur": 2.255, + "args": { + "External id": 976675,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937151897.072, "dur": 45.151, + "args": { + "External id": 976676,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937151897.995, "dur": 43.518, + "args": { + "External id": 976677,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937151947.594, "dur": 6.409, + "args": { + "External id": 976678,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937151949.718, "dur": 3.031, + "args": { + "External id": 976679,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937151959.209, "dur": 1.565, + "args": { + "External id": 976680,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937151969.098, "dur": 9.092, + "args": { + "External id": 976681,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937151973.612, "dur": 4.259, + "args": { + "External id": 976682,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937152139.007, "dur": 213.857, + "args": { + "External id": 976683,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937152142.357, "dur": 3.801, + "args": { + "External id": 976684,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937152148.052, "dur": 204.141, + "args": { + "External id": 976685,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937152149.665, "dur": 0.458, + "args": { + "External id": 976686,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937152152.971, "dur": 26.770, + "args": { + "External id": 976687,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937152181.523, "dur": 3.195, + "args": { + "External id": 976688,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937152183.466, "dur": 0.897, + "args": { + "External id": 976689,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937152188.187, "dur": 26.539, + "args": { + "External id": 976690,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937152189.297, "dur": 3.452, + "args": { + "External id": 976691,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937152194.149, "dur": 20.259, + "args": { + "External id": 976692,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937152197.332, "dur": 2.617, + "args": { + "External id": 976693,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937152216.371, "dur": 24.668, + "args": { + "External id": 976694,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937152243.093, "dur": 15.541, + "args": { + "External id": 976695,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937152261.858, "dur": 15.006, + "args": { + "External id": 976696,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937152278.506, "dur": 14.983, + "args": { + "External id": 976697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937152295.632, "dur": 25.682, + "args": { + "External id": 976698,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937152297.894, "dur": 2.030, + "args": { + "External id": 976699,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937152303.632, "dur": 0.836, + "args": { + "External id": 976700,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937152323.009, "dur": 14.655, + "args": { + "External id": 976701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937152338.971, "dur": 12.007, + "args": { + "External id": 976702,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937152361.165, "dur": 2.121, + "args": { + "External id": 976703,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937152374.561, "dur": 4.160, + "args": { + "External id": 976704,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937152377.051, "dur": 0.527, + "args": { + "External id": 976705,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937152454.930, "dur": 67.851, + "args": { + "External id": 976706,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937152528.735, "dur": 7.609, + "args": { + "External id": 976707,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937152534.238, "dur": 0.952, + "args": { + "External id": 976708,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937152538.198, "dur": 30.747, + "args": { + "External id": 976709,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937152573.738, "dur": 7.419, + "args": { + "External id": 976710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937152575.249, "dur": 5.051, + "args": { + "External id": 976711,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937152577.308, "dur": 2.730, + "args": { + "External id": 976712,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937152584.117, "dur": 50.216, + "args": { + "External id": 976713,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937152587.375, "dur": 46.292, + "args": { + "External id": 976714,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937152639.020, "dur": 16.696, + "args": { + "External id": 976715,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937152662.176, "dur": 3.913, + "args": { + "External id": 976716,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937152664.266, "dur": 0.829, + "args": { + "External id": 976717,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937152670.692, "dur": 51.750, + "args": { + "External id": 976718,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937152671.653, "dur": 5.832, + "args": { + "External id": 976719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937152672.731, "dur": 4.195, + "args": { + "External id": 976720,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937152676.110, "dur": 0.496, + "args": { + "External id": 976721,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937152678.118, "dur": 43.826, + "args": { + "External id": 976722,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937152679.041, "dur": 42.235, + "args": { + "External id": 976723,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937152727.298, "dur": 4.267, + "args": { + "External id": 976724,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937152729.634, "dur": 0.635, + "args": { + "External id": 976725,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937152737.326, "dur": 1.480, + "args": { + "External id": 976726,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937152748.018, "dur": 10.828, + "args": { + "External id": 976727,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937152752.264, "dur": 6.293, + "args": { + "External id": 976728,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937152850.621, "dur": 268.645, + "args": { + "External id": 976729,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937152852.312, "dur": 2.093, + "args": { + "External id": 976730,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937152857.653, "dur": 260.960, + "args": { + "External id": 976731,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937152858.899, "dur": 0.333, + "args": { + "External id": 976732,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937152860.215, "dur": 22.376, + "args": { + "External id": 976733,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937152884.498, "dur": 4.155, + "args": { + "External id": 976734,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937152886.527, "dur": 1.881, + "args": { + "External id": 976735,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937152891.789, "dur": 23.557, + "args": { + "External id": 976736,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937152893.046, "dur": 1.423, + "args": { + "External id": 976737,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937152895.562, "dur": 19.505, + "args": { + "External id": 976738,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937152898.332, "dur": 2.594, + "args": { + "External id": 976739,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937152917.089, "dur": 21.517, + "args": { + "External id": 976740,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937152940.068, "dur": 14.316, + "args": { + "External id": 976741,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937152957.410, "dur": 16.186, + "args": { + "External id": 976742,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937152975.150, "dur": 14.276, + "args": { + "External id": 976743,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937152991.239, "dur": 45.742, + "args": { + "External id": 976744,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937152995.686, "dur": 1.758, + "args": { + "External id": 976745,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937152999.370, "dur": 0.719, + "args": { + "External id": 976746,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937153039.863, "dur": 56.760, + "args": { + "External id": 976747,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937153099.005, "dur": 17.778, + "args": { + "External id": 976748,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937153129.080, "dur": 2.537, + "args": { + "External id": 976749,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937153142.405, "dur": 5.921, + "args": { + "External id": 976750,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937153145.368, "dur": 0.708, + "args": { + "External id": 976751,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937153228.354, "dur": 72.925, + "args": { + "External id": 976752,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937153307.483, "dur": 6.745, + "args": { + "External id": 976753,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937153310.427, "dur": 2.360, + "args": { + "External id": 976754,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937153316.062, "dur": 31.650, + "args": { + "External id": 976755,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937153353.032, "dur": 8.231, + "args": { + "External id": 976756,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937153354.943, "dur": 5.454, + "args": { + "External id": 976757,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937153359.319, "dur": 0.886, + "args": { + "External id": 976758,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937153363.902, "dur": 48.588, + "args": { + "External id": 976759,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937153365.171, "dur": 46.543, + "args": { + "External id": 976760,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937153416.901, "dur": 18.397, + "args": { + "External id": 976761,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937153442.062, "dur": 3.966, + "args": { + "External id": 976762,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937153444.396, "dur": 0.556, + "args": { + "External id": 976763,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937153450.610, "dur": 58.099, + "args": { + "External id": 976764,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937153454.088, "dur": 4.384, + "args": { + "External id": 976765,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937153455.148, "dur": 2.632, + "args": { + "External id": 976766,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937153456.749, "dur": 0.885, + "args": { + "External id": 976767,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937153459.118, "dur": 49.081, + "args": { + "External id": 976768,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937153460.353, "dur": 47.059, + "args": { + "External id": 976769,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937153513.482, "dur": 3.974, + "args": { + "External id": 976770,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937153515.476, "dur": 0.553, + "args": { + "External id": 976771,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937153525.199, "dur": 1.463, + "args": { + "External id": 976772,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937153535.479, "dur": 7.555, + "args": { + "External id": 976773,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937153537.894, "dur": 4.823, + "args": { + "External id": 976774,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937153637.302, "dur": 234.832, + "args": { + "External id": 976775,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937153639.531, "dur": 3.675, + "args": { + "External id": 976776,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937153647.143, "dur": 224.246, + "args": { + "External id": 976777,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937153648.464, "dur": 0.501, + "args": { + "External id": 976778,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937153650.287, "dur": 25.610, + "args": { + "External id": 976779,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937153677.509, "dur": 4.699, + "args": { + "External id": 976780,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937153681.333, "dur": 0.617, + "args": { + "External id": 976781,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937153683.261, "dur": 43.834, + "args": { + "External id": 976782,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937153684.448, "dur": 1.370, + "args": { + "External id": 976783,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937153686.854, "dur": 39.925, + "args": { + "External id": 976784,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937153708.767, "dur": 4.041, + "args": { + "External id": 976785,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937153728.782, "dur": 26.194, + "args": { + "External id": 976786,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937153756.673, "dur": 17.913, + "args": { + "External id": 976787,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937153779.478, "dur": 15.777, + "args": { + "External id": 976788,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937153796.865, "dur": 14.942, + "args": { + "External id": 976789,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937153813.819, "dur": 25.432, + "args": { + "External id": 976790,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937153817.834, "dur": 2.001, + "args": { + "External id": 976791,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937153821.825, "dur": 0.796, + "args": { + "External id": 976792,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937153840.739, "dur": 14.383, + "args": { + "External id": 976793,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937153858.726, "dur": 11.268, + "args": { + "External id": 976794,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937153880.200, "dur": 1.754, + "args": { + "External id": 976795,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937153891.476, "dur": 4.038, + "args": { + "External id": 976796,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937153894.035, "dur": 0.487, + "args": { + "External id": 976797,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937153970.543, "dur": 123.458, + "args": { + "External id": 976798,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937154103.644, "dur": 7.313, + "args": { + "External id": 976799,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937154108.033, "dur": 1.116, + "args": { + "External id": 976800,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937154112.996, "dur": 36.855, + "args": { + "External id": 976801,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937154156.454, "dur": 9.094, + "args": { + "External id": 976802,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937154161.009, "dur": 3.468, + "args": { + "External id": 976803,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937154162.999, "dur": 1.244, + "args": { + "External id": 976804,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937154169.695, "dur": 58.259, + "args": { + "External id": 976805,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937154171.159, "dur": 55.984, + "args": { + "External id": 976806,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937154232.925, "dur": 19.005, + "args": { + "External id": 976807,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937154257.390, "dur": 28.948, + "args": { + "External id": 976808,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937154260.077, "dur": 25.742, + "args": { + "External id": 976809,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937154266.583, "dur": 0.663, + "args": { + "External id": 976810,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 15854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345937154294.734, "dur": 34.417, + "args": { + "External id": 976811,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345937154296.871, "dur": 32.007, + "args": { + "External id": 976812,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 15856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937154302.453, "dur": 4.118, + "args": { + "External id": 976813,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937154307.757, "dur": 20.520, + "args": { + "External id": 976814,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2338710, + "ts": 6345937154341.986, "dur": 5.576, + "args": { + "External id": 976815,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2338710, + "ts": 6345937154344.157, "dur": 3.086, + "args": { + "External id": 976816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2338710, + "ts": 6345937154348.790, "dur": 3.953, + "args": { + "External id": 976817,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2338710, + "ts": 6345937154352.065, "dur": 0.570, + "args": { + "External id": 976818,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937154402.454, "dur": 25.980, + "args": { + "External id": 976819,"Sequence number": 10552458, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 15863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937154430.674, "dur": 16.101, + "args": { + "External id": 976820,"Sequence number": 10552459, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 15864 + } + }, + { + "ph": "s", "id": 9, "pid": 2338710, "tid": 2338710, "ts": 6345937154430.674, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937154454.022, "dur": 6.343, + "args": { + "External id": 976821,"Sequence number": 10552460, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 15865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937154457.608, "dur": 1.204, + "args": { + "External id": 976822,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2338710, + "ts": 6345937154463.224, "dur": 8.956, + "args": { + "External id": 976823,"Sequence number": 10552460, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "3"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 15867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937154470.187, "dur": 0.442, + "args": { + "External id": 976824,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "4"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937154473.861, "dur": 2.666, + "args": { + "External id": 976825,"Sequence number": 10552460, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 15869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937154475.315, "dur": 0.519, + "args": { + "External id": 976826,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "4"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 15870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937154480.985, "dur": 5.912, + "args": { + "External id": 976827,"Sequence number": 10552460, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15871 + } + }, + { + "ph": "s", "id": 8, "pid": 2338710, "tid": 2338710, "ts": 6345937154480.985, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937154484.284, "dur": 1.005, + "args": { + "External id": 976828,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937154490.314, "dur": 4.204, + "args": { + "External id": 976829,"Sequence number": 10552461, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15873 + } + }, + { + "ph": "s", "id": 7, "pid": 2338710, "tid": 2338710, "ts": 6345937154490.314, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937154493.224, "dur": 0.310, + "args": { + "External id": 976830,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2338710, + "ts": 6345937154495.798, "dur": 5.568, + "args": { + "External id": 976831,"Sequence number": 10552462, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 15875 + } + }, + { + "ph": "s", "id": 6, "pid": 2338710, "tid": 2338710, "ts": 6345937154495.798, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937154499.473, "dur": 0.892, + "args": { + "External id": 976832,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937154508.597, "dur": 5.250, + "args": { + "External id": 976833,"Sequence number": 10552463, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 15877 + } + }, + { + "ph": "s", "id": 5, "pid": 2338710, "tid": 2338710, "ts": 6345937154508.597, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937154511.304, "dur": 1.588, + "args": { + "External id": 976834,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 15878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338710, "tid": 2338710, + "ts": 6345937154518.457, "dur": 37.763, + "args": { + "External id": 976835,"Sequence number": 10552464, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2338710, + "ts": 6345937154522.844, "dur": 33.140, + "args": { + "External id": 976836,"Sequence number": 10552464, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345937154525.682, "dur": 9.209, + "args": { + "External id": 976837,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 15881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937154528.245, "dur": 5.972, + "args": { + "External id": 976838,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937154535.849, "dur": 19.616, + "args": { + "External id": 976839,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 15883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937154587.230, "dur": 4.714, + "args": { + "External id": 976840,"Sequence number": 10552464, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 15884 + } + }, + { + "ph": "s", "id": 4, "pid": 2338710, "tid": 2338710, "ts": 6345937154587.230, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937154594.975, "dur": 1.609, + "args": { + "External id": 976841,"Sequence number": 10552465, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345937154636.950, "dur": 46787.870, + "args": { + "External id": 976842,"Sequence number": 10552465, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 15886 + } + }, + { + "ph": "s", "id": 3, "pid": 2338710, "tid": 2338710, "ts": 6345937154636.950, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338710, "tid": 2338710, + "ts": 6345937154656.566, "dur": 31.748, + "args": { + "External id": 976843,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2338710, + "ts": 6345937154657.334, "dur": 30.720, + "args": { + "External id": 976844,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345937154658.752, "dur": 7.867, + "args": { + "External id": 976845,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937154660.497, "dur": 5.579, + "args": { + "External id": 976846,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937154667.558, "dur": 20.095, + "args": { + "External id": 976847,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 15891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338710, "tid": 2338710, + "ts": 6345937154706.914, "dur": 32.064, + "args": { + "External id": 976848,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345937154708.344, "dur": 6.388, + "args": { + "External id": 976849,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937154710.294, "dur": 4.111, + "args": { + "External id": 976850,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345937154716.155, "dur": 22.570, + "args": { + "External id": 976851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937154720.511, "dur": 17.699, + "args": { + "External id": 976852,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338710, "tid": 2338710, + "ts": 6345937154743.119, "dur": 21.514, + "args": { + "External id": 976853,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345937154744.102, "dur": 4.176, + "args": { + "External id": 976854,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937154745.259, "dur": 2.706, + "args": { + "External id": 976855,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345937154748.870, "dur": 15.524, + "args": { + "External id": 976856,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937154749.561, "dur": 14.462, + "args": { + "External id": 976857,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 15901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2338710, + "ts": 6345937154771.946, "dur": 23.117, + "args": { + "External id": 976858,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 15902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937154773.706, "dur": 3.467, + "args": { + "External id": 976859,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345937154778.056, "dur": 16.689, + "args": { + "External id": 976860,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 15904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937154781.280, "dur": 13.082, + "args": { + "External id": 976861,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338710, "tid": 2338710, + "ts": 6345937154800.531, "dur": 25.327, + "args": { + "External id": 976862,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937154828.736, "dur": 57.558, + "args": { + "External id": 976863,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937154836.107, "dur": 49.663, + "args": { + "External id": 976864,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937154842.605, "dur": 0.628, + "args": { + "External id": 976865,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 15909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345937154844.602, "dur": 23.700, + "args": { + "External id": 976866,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345937154845.980, "dur": 22.051, + "args": { + "External id": 976867,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 15911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937154848.332, "dur": 3.124, + "args": { + "External id": 976868,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937154852.423, "dur": 15.218, + "args": { + "External id": 976869,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 15913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338710, "tid": 2338710, + "ts": 6345937154890.368, "dur": 39788.039, + "args": { + "External id": 976870,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338710, "tid": 2338710, + "ts": 6345937154891.981, "dur": 39785.065, + "args": { + "External id": 976871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937194693.693, "dur": 10.545, + "args": { + "External id": 976872,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937194699.767, "dur": 1.465, + "args": { + "External id": 976873,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937194711.692, "dur": 131.656, + "args": { + "External id": 976874,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937194713.602, "dur": 8.699, + "args": { + "External id": 976875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937194716.821, "dur": 3.040, + "args": { + "External id": 976876,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937194718.680, "dur": 0.855, + "args": { + "External id": 976877,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937194726.215, "dur": 116.117, + "args": { + "External id": 976878,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937194729.110, "dur": 112.457, + "args": { + "External id": 976879,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937194848.984, "dur": 6.010, + "args": { + "External id": 976880,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937194852.581, "dur": 0.720, + "args": { + "External id": 976881,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937194863.877, "dur": 3.944, + "args": { + "External id": 976882,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937194880.343, "dur": 11.542, + "args": { + "External id": 976883,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937194886.320, "dur": 5.168, + "args": { + "External id": 976884,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937195110.566, "dur": 316.809, + "args": { + "External id": 976885,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937195117.141, "dur": 4.693, + "args": { + "External id": 976886,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937195123.848, "dur": 302.648, + "args": { + "External id": 976887,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937195126.781, "dur": 0.720, + "args": { + "External id": 976888,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937195128.990, "dur": 37.658, + "args": { + "External id": 976889,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937195169.708, "dur": 5.320, + "args": { + "External id": 976890,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937195173.647, "dur": 0.945, + "args": { + "External id": 976891,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937195177.315, "dur": 34.320, + "args": { + "External id": 976892,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937195181.099, "dur": 1.451, + "args": { + "External id": 976893,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937195185.221, "dur": 25.931, + "args": { + "External id": 976894,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937195189.050, "dur": 4.849, + "args": { + "External id": 976895,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937195214.821, "dur": 27.566, + "args": { + "External id": 976896,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937195291.588, "dur": 22.667, + "args": { + "External id": 976897,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937195319.648, "dur": 21.158, + "args": { + "External id": 976898,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937195342.529, "dur": 17.140, + "args": { + "External id": 976899,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937195362.129, "dur": 30.806, + "args": { + "External id": 976900,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937195364.904, "dur": 2.918, + "args": { + "External id": 976901,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937195373.542, "dur": 0.720, + "args": { + "External id": 976902,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937195394.799, "dur": 15.356, + "args": { + "External id": 976903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937195411.473, "dur": 13.874, + "args": { + "External id": 976904,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937195436.125, "dur": 2.340, + "args": { + "External id": 976905,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937195446.829, "dur": 5.745, + "args": { + "External id": 976906,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937195451.045, "dur": 0.481, + "args": { + "External id": 976907,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937195542.470, "dur": 91.068, + "args": { + "External id": 976908,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937195642.374, "dur": 8.432, + "args": { + "External id": 976909,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937195646.876, "dur": 0.934, + "args": { + "External id": 976910,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937195652.800, "dur": 32.143, + "args": { + "External id": 976911,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937195690.754, "dur": 8.119, + "args": { + "External id": 976912,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937195692.956, "dur": 5.143, + "args": { + "External id": 976913,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937195695.785, "dur": 2.053, + "args": { + "External id": 976914,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937195704.902, "dur": 56.314, + "args": { + "External id": 976915,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937195706.276, "dur": 54.154, + "args": { + "External id": 976916,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937195765.955, "dur": 20.338, + "args": { + "External id": 976917,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937195793.733, "dur": 5.419, + "args": { + "External id": 976918,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937195796.310, "dur": 1.797, + "args": { + "External id": 976919,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937195804.201, "dur": 59.836, + "args": { + "External id": 976920,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937195805.176, "dur": 9.206, + "args": { + "External id": 976921,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937195808.984, "dur": 4.633, + "args": { + "External id": 976922,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937195810.305, "dur": 2.957, + "args": { + "External id": 976923,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937195815.141, "dur": 48.339, + "args": { + "External id": 976924,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937195815.942, "dur": 46.874, + "args": { + "External id": 976925,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937195868.851, "dur": 5.043, + "args": { + "External id": 976926,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937195871.279, "dur": 0.738, + "args": { + "External id": 976927,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937195882.224, "dur": 1.979, + "args": { + "External id": 976928,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937195895.474, "dur": 9.774, + "args": { + "External id": 976929,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937195897.732, "dur": 7.137, + "args": { + "External id": 976930,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937196033.703, "dur": 278.599, + "args": { + "External id": 976931,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937196037.418, "dur": 3.674, + "args": { + "External id": 976932,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937196042.891, "dur": 268.921, + "args": { + "External id": 976933,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937196047.201, "dur": 0.493, + "args": { + "External id": 976934,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937196048.823, "dur": 72.725, + "args": { + "External id": 976935,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937196124.750, "dur": 3.591, + "args": { + "External id": 976936,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937196127.124, "dur": 0.899, + "args": { + "External id": 976937,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937196129.544, "dur": 28.404, + "args": { + "External id": 976938,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937196131.121, "dur": 2.337, + "args": { + "External id": 976939,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937196134.726, "dur": 22.881, + "args": { + "External id": 976940,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937196139.314, "dur": 2.935, + "args": { + "External id": 976941,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937196160.024, "dur": 29.410, + "args": { + "External id": 976942,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937196191.160, "dur": 15.941, + "args": { + "External id": 976943,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937196213.422, "dur": 18.725, + "args": { + "External id": 976944,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937196233.779, "dur": 15.746, + "args": { + "External id": 976945,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937196251.857, "dur": 25.274, + "args": { + "External id": 976946,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937196254.724, "dur": 2.365, + "args": { + "External id": 976947,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937196259.118, "dur": 0.813, + "args": { + "External id": 976948,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937196279.230, "dur": 15.797, + "args": { + "External id": 976949,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937196296.413, "dur": 14.219, + "args": { + "External id": 976950,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937196325.178, "dur": 2.370, + "args": { + "External id": 976951,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937196339.964, "dur": 5.494, + "args": { + "External id": 976952,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937196343.794, "dur": 0.612, + "args": { + "External id": 976953,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937196428.035, "dur": 75.931, + "args": { + "External id": 976954,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937196510.071, "dur": 5.239, + "args": { + "External id": 976955,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937196513.010, "dur": 0.919, + "args": { + "External id": 976956,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937196516.985, "dur": 32.477, + "args": { + "External id": 976957,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937196554.716, "dur": 8.695, + "args": { + "External id": 976958,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937196556.706, "dur": 5.919, + "args": { + "External id": 976959,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937196561.257, "dur": 1.146, + "args": { + "External id": 976960,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937196566.267, "dur": 50.879, + "args": { + "External id": 976961,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937196567.761, "dur": 48.733, + "args": { + "External id": 976962,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937196633.206, "dur": 19.294, + "args": { + "External id": 976963,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937196659.784, "dur": 4.224, + "args": { + "External id": 976964,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937196662.212, "dur": 0.765, + "args": { + "External id": 976965,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937196672.292, "dur": 57.518, + "args": { + "External id": 976966,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937196673.436, "dur": 5.155, + "args": { + "External id": 976967,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937196674.324, "dur": 3.498, + "args": { + "External id": 976968,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937196675.481, "dur": 2.043, + "args": { + "External id": 976969,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937196679.566, "dur": 49.618, + "args": { + "External id": 976970,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937196680.460, "dur": 48.161, + "args": { + "External id": 976971,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937196735.629, "dur": 4.528, + "args": { + "External id": 976972,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937196737.875, "dur": 0.681, + "args": { + "External id": 976973,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937196748.080, "dur": 1.666, + "args": { + "External id": 976974,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937196758.710, "dur": 8.738, + "args": { + "External id": 976975,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937196761.210, "dur": 5.910, + "args": { + "External id": 976976,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937196876.477, "dur": 295.368, + "args": { + "External id": 976977,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937196879.126, "dur": 2.496, + "args": { + "External id": 976978,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937196883.314, "dur": 287.954, + "args": { + "External id": 976979,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937196886.880, "dur": 0.356, + "args": { + "External id": 976980,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937196890.151, "dur": 34.062, + "args": { + "External id": 976981,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937196926.110, "dur": 3.203, + "args": { + "External id": 976982,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937196928.147, "dur": 0.957, + "args": { + "External id": 976983,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937196930.395, "dur": 30.322, + "args": { + "External id": 976984,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937196931.437, "dur": 3.181, + "args": { + "External id": 976985,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937196935.859, "dur": 24.496, + "args": { + "External id": 976986,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937196938.473, "dur": 3.191, + "args": { + "External id": 976987,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937196962.033, "dur": 22.394, + "args": { + "External id": 976988,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937196985.927, "dur": 15.833, + "args": { + "External id": 976989,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937197025.907, "dur": 21.681, + "args": { + "External id": 976990,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937197050.007, "dur": 56.053, + "args": { + "External id": 976991,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937197109.656, "dur": 27.030, + "args": { + "External id": 976992,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937197112.205, "dur": 2.716, + "args": { + "External id": 976993,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937197116.986, "dur": 0.841, + "args": { + "External id": 976994,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937197138.832, "dur": 14.575, + "args": { + "External id": 976995,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937197157.062, "dur": 12.747, + "args": { + "External id": 976996,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937197181.670, "dur": 2.754, + "args": { + "External id": 976997,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937197196.064, "dur": 5.911, + "args": { + "External id": 976998,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937197199.232, "dur": 1.635, + "args": { + "External id": 976999,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937197286.426, "dur": 80.156, + "args": { + "External id": 977000,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937197372.742, "dur": 5.494, + "args": { + "External id": 977001,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937197376.205, "dur": 0.916, + "args": { + "External id": 977002,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937197379.903, "dur": 34.759, + "args": { + "External id": 977003,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937197422.566, "dur": 7.785, + "args": { + "External id": 977004,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937197424.665, "dur": 4.831, + "args": { + "External id": 977005,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937197426.693, "dur": 2.592, + "args": { + "External id": 977006,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937197433.783, "dur": 50.533, + "args": { + "External id": 977007,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937197435.134, "dur": 48.414, + "args": { + "External id": 977008,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937197488.782, "dur": 18.200, + "args": { + "External id": 977009,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937197514.649, "dur": 6.043, + "args": { + "External id": 977010,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937197518.929, "dur": 0.573, + "args": { + "External id": 977011,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937197525.416, "dur": 52.497, + "args": { + "External id": 977012,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937197526.568, "dur": 3.654, + "args": { + "External id": 977013,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937197527.723, "dur": 1.747, + "args": { + "External id": 977014,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937197528.831, "dur": 0.478, + "args": { + "External id": 977015,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937197531.009, "dur": 46.511, + "args": { + "External id": 977016,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937197534.489, "dur": 42.513, + "args": { + "External id": 977017,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937197582.851, "dur": 6.591, + "args": { + "External id": 977018,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937197585.124, "dur": 2.753, + "args": { + "External id": 977019,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937197596.155, "dur": 1.700, + "args": { + "External id": 977020,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937197613.804, "dur": 12.473, + "args": { + "External id": 977021,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937197618.758, "dur": 7.180, + "args": { + "External id": 977022,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937197727.120, "dur": 194.999, + "args": { + "External id": 977023,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937197729.543, "dur": 2.588, + "args": { + "External id": 977024,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937197733.842, "dur": 187.838, + "args": { + "External id": 977025,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937197735.400, "dur": 0.407, + "args": { + "External id": 977026,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937197737.328, "dur": 24.525, + "args": { + "External id": 977027,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937197763.734, "dur": 3.341, + "args": { + "External id": 977028,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937197766.003, "dur": 0.813, + "args": { + "External id": 977029,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937197770.888, "dur": 23.814, + "args": { + "External id": 977030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937197772.091, "dur": 1.663, + "args": { + "External id": 977031,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937197774.848, "dur": 19.515, + "args": { + "External id": 977032,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937197779.531, "dur": 2.401, + "args": { + "External id": 977033,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937197796.271, "dur": 24.680, + "args": { + "External id": 977034,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937197822.490, "dur": 13.427, + "args": { + "External id": 977035,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937197838.956, "dur": 14.927, + "args": { + "External id": 977036,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937197855.455, "dur": 12.808, + "args": { + "External id": 977037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937197870.252, "dur": 23.219, + "args": { + "External id": 977038,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937197872.362, "dur": 2.237, + "args": { + "External id": 977039,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937197878.783, "dur": 0.517, + "args": { + "External id": 977040,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937197895.330, "dur": 12.640, + "args": { + "External id": 977041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937197909.598, "dur": 10.938, + "args": { + "External id": 977042,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937197928.950, "dur": 1.720, + "args": { + "External id": 977043,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937197939.936, "dur": 4.178, + "args": { + "External id": 977044,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937197942.441, "dur": 0.640, + "args": { + "External id": 977045,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937198033.003, "dur": 121.461, + "args": { + "External id": 977046,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937198163.430, "dur": 9.593, + "args": { + "External id": 977047,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937198169.774, "dur": 1.603, + "args": { + "External id": 977048,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937198174.774, "dur": 31.901, + "args": { + "External id": 977049,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937198212.644, "dur": 5.635, + "args": { + "External id": 977050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937198214.357, "dur": 2.980, + "args": { + "External id": 977051,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937198216.247, "dur": 0.845, + "args": { + "External id": 977052,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937198222.177, "dur": 51.706, + "args": { + "External id": 977053,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937198226.100, "dur": 47.057, + "args": { + "External id": 977054,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937198278.183, "dur": 16.985, + "args": { + "External id": 977055,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937198302.231, "dur": 4.316, + "args": { + "External id": 977056,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937198304.707, "dur": 0.689, + "args": { + "External id": 977057,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937198311.300, "dur": 53.863, + "args": { + "External id": 977058,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937198312.442, "dur": 8.175, + "args": { + "External id": 977059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937198313.418, "dur": 6.404, + "args": { + "External id": 977060,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937198317.151, "dur": 2.420, + "args": { + "External id": 977061,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937198321.568, "dur": 43.079, + "args": { + "External id": 977062,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937198322.317, "dur": 41.627, + "args": { + "External id": 977063,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937198370.410, "dur": 4.246, + "args": { + "External id": 977064,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937198372.420, "dur": 0.410, + "args": { + "External id": 977065,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937198381.461, "dur": 2.040, + "args": { + "External id": 977066,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937198392.801, "dur": 9.370, + "args": { + "External id": 977067,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937198397.387, "dur": 4.458, + "args": { + "External id": 977068,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937198510.162, "dur": 207.864, + "args": { + "External id": 977069,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937198512.810, "dur": 3.981, + "args": { + "External id": 977070,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937198518.545, "dur": 198.918, + "args": { + "External id": 977071,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937198520.084, "dur": 0.389, + "args": { + "External id": 977072,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937198523.847, "dur": 24.339, + "args": { + "External id": 977073,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937198549.887, "dur": 3.043, + "args": { + "External id": 977074,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937198552.009, "dur": 0.669, + "args": { + "External id": 977075,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937198556.297, "dur": 25.657, + "args": { + "External id": 977076,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937198557.424, "dur": 3.058, + "args": { + "External id": 977077,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937198561.696, "dur": 19.924, + "args": { + "External id": 977078,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937198564.365, "dur": 3.226, + "args": { + "External id": 977079,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937198583.817, "dur": 22.002, + "args": { + "External id": 977080,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937198607.586, "dur": 16.208, + "args": { + "External id": 977081,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937198627.185, "dur": 15.522, + "args": { + "External id": 977082,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937198644.661, "dur": 14.868, + "args": { + "External id": 977083,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937198661.322, "dur": 25.385, + "args": { + "External id": 977084,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937198665.903, "dur": 1.691, + "args": { + "External id": 977085,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937198669.568, "dur": 0.703, + "args": { + "External id": 977086,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937198688.644, "dur": 15.126, + "args": { + "External id": 977087,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937198704.863, "dur": 11.463, + "args": { + "External id": 977088,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937198725.360, "dur": 1.694, + "args": { + "External id": 977089,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937198736.719, "dur": 3.840, + "args": { + "External id": 977090,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937198739.192, "dur": 0.425, + "args": { + "External id": 977091,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937198815.627, "dur": 59.403, + "args": { + "External id": 977092,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937198880.499, "dur": 15.107, + "args": { + "External id": 977093,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937198891.607, "dur": 2.594, + "args": { + "External id": 977094,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937198897.266, "dur": 29.926, + "args": { + "External id": 977095,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937198931.875, "dur": 8.132, + "args": { + "External id": 977096,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937198933.671, "dur": 5.411, + "args": { + "External id": 977097,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937198938.020, "dur": 0.837, + "args": { + "External id": 977098,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937198942.564, "dur": 47.703, + "args": { + "External id": 977099,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937198943.627, "dur": 45.901, + "args": { + "External id": 977100,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937198994.242, "dur": 35.472, + "args": { + "External id": 977101,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937199039.256, "dur": 5.017, + "args": { + "External id": 977102,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937199042.208, "dur": 0.817, + "args": { + "External id": 977103,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937199051.457, "dur": 96.263, + "args": { + "External id": 977104,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937199052.467, "dur": 39.885, + "args": { + "External id": 977105,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937199053.717, "dur": 37.530, + "args": { + "External id": 977106,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937199055.115, "dur": 35.251, + "args": { + "External id": 977107,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937199094.105, "dur": 53.069, + "args": { + "External id": 977108,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937199095.131, "dur": 51.095, + "args": { + "External id": 977109,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937199155.828, "dur": 7.140, + "args": { + "External id": 977110,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937199158.495, "dur": 2.628, + "args": { + "External id": 977111,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937199170.580, "dur": 2.075, + "args": { + "External id": 977112,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937199182.052, "dur": 8.188, + "args": { + "External id": 977113,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937199184.183, "dur": 5.702, + "args": { + "External id": 977114,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937199292.709, "dur": 201.498, + "args": { + "External id": 977115,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937199295.246, "dur": 4.133, + "args": { + "External id": 977116,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937199301.007, "dur": 192.616, + "args": { + "External id": 977117,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937199302.292, "dur": 0.551, + "args": { + "External id": 977118,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937199304.321, "dur": 25.487, + "args": { + "External id": 977119,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937199331.580, "dur": 5.183, + "args": { + "External id": 977120,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937199335.769, "dur": 0.732, + "args": { + "External id": 977121,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937199338.094, "dur": 24.763, + "args": { + "External id": 977122,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937199339.481, "dur": 1.371, + "args": { + "External id": 977123,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937199342.007, "dur": 20.520, + "args": { + "External id": 977124,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937199347.221, "dur": 2.748, + "args": { + "External id": 977125,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937199364.783, "dur": 24.798, + "args": { + "External id": 977126,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937199391.332, "dur": 13.828, + "args": { + "External id": 977127,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937199407.975, "dur": 15.593, + "args": { + "External id": 977128,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937199425.284, "dur": 13.021, + "args": { + "External id": 977129,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937199440.167, "dur": 23.176, + "args": { + "External id": 977130,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937199442.448, "dur": 2.140, + "args": { + "External id": 977131,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937199446.436, "dur": 2.481, + "args": { + "External id": 977132,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937199467.900, "dur": 11.975, + "args": { + "External id": 977133,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937199481.041, "dur": 11.301, + "args": { + "External id": 977134,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937199501.531, "dur": 2.150, + "args": { + "External id": 977135,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937199513.590, "dur": 3.701, + "args": { + "External id": 977136,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937199515.908, "dur": 0.409, + "args": { + "External id": 977137,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937199585.575, "dur": 58.730, + "args": { + "External id": 977138,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937199650.450, "dur": 4.718, + "args": { + "External id": 977139,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937199653.331, "dur": 0.651, + "args": { + "External id": 977140,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937199656.543, "dur": 27.768, + "args": { + "External id": 977141,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937199691.409, "dur": 6.023, + "args": { + "External id": 977142,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937199693.053, "dur": 3.589, + "args": { + "External id": 977143,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937199695.195, "dur": 1.214, + "args": { + "External id": 977144,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937199700.346, "dur": 44.429, + "args": { + "External id": 977145,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937199701.389, "dur": 42.572, + "args": { + "External id": 977146,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937199749.034, "dur": 17.514, + "args": { + "External id": 977147,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937199772.722, "dur": 6.190, + "args": { + "External id": 977148,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937199777.129, "dur": 0.677, + "args": { + "External id": 977149,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937199783.068, "dur": 51.211, + "args": { + "External id": 977150,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937199784.063, "dur": 4.146, + "args": { + "External id": 977151,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937199784.919, "dur": 2.696, + "args": { + "External id": 977152,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937199786.584, "dur": 0.875, + "args": { + "External id": 977153,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937199788.809, "dur": 45.036, + "args": { + "External id": 977154,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937199792.044, "dur": 41.129, + "args": { + "External id": 977155,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937199839.279, "dur": 4.302, + "args": { + "External id": 977156,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937199841.311, "dur": 0.627, + "args": { + "External id": 977157,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937199848.589, "dur": 1.533, + "args": { + "External id": 977158,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937199858.733, "dur": 8.878, + "args": { + "External id": 977159,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937199860.863, "dur": 6.464, + "args": { + "External id": 977160,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937199957.789, "dur": 275.815, + "args": { + "External id": 977161,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937199961.674, "dur": 2.275, + "args": { + "External id": 977162,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937199965.231, "dur": 267.599, + "args": { + "External id": 977163,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937199966.479, "dur": 0.310, + "args": { + "External id": 977164,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937199967.912, "dur": 22.709, + "args": { + "External id": 977165,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937199992.069, "dur": 4.607, + "args": { + "External id": 977166,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937199995.607, "dur": 0.851, + "args": { + "External id": 977167,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937199997.543, "dur": 47.196, + "args": { + "External id": 977168,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937199998.893, "dur": 1.297, + "args": { + "External id": 977169,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937200001.353, "dur": 43.027, + "args": { + "External id": 977170,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937200006.475, "dur": 21.657, + "args": { + "External id": 977171,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937200046.942, "dur": 63.424, + "args": { + "External id": 977172,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937200113.272, "dur": 22.186, + "args": { + "External id": 977173,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937200138.865, "dur": 16.659, + "args": { + "External id": 977174,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937200157.212, "dur": 15.303, + "args": { + "External id": 977175,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937200174.893, "dur": 23.783, + "args": { + "External id": 977176,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937200177.197, "dur": 1.926, + "args": { + "External id": 977177,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937200181.176, "dur": 0.936, + "args": { + "External id": 977178,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937200202.746, "dur": 15.210, + "args": { + "External id": 977179,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937200219.591, "dur": 11.946, + "args": { + "External id": 977180,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937200243.086, "dur": 2.844, + "args": { + "External id": 977181,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937200256.588, "dur": 4.044, + "args": { + "External id": 977182,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937200259.184, "dur": 0.445, + "args": { + "External id": 977183,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937200342.734, "dur": 72.849, + "args": { + "External id": 977184,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937200421.859, "dur": 8.088, + "args": { + "External id": 977185,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937200427.726, "dur": 0.963, + "args": { + "External id": 977186,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937200431.762, "dur": 31.304, + "args": { + "External id": 977187,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937200467.984, "dur": 6.407, + "args": { + "External id": 977188,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937200469.831, "dur": 3.578, + "args": { + "External id": 977189,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937200471.735, "dur": 1.391, + "args": { + "External id": 977190,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937200477.320, "dur": 49.709, + "args": { + "External id": 977191,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937200478.416, "dur": 48.033, + "args": { + "External id": 977192,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937200533.983, "dur": 15.325, + "args": { + "External id": 977193,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937200556.116, "dur": 3.947, + "args": { + "External id": 977194,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937200558.438, "dur": 0.673, + "args": { + "External id": 977195,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345937200564.552, "dur": 51.617, + "args": { + "External id": 977196,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937200565.631, "dur": 5.993, + "args": { + "External id": 977197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937200566.551, "dur": 4.382, + "args": { + "External id": 977198,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937200570.316, "dur": 0.446, + "args": { + "External id": 977199,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937200572.295, "dur": 43.408, + "args": { + "External id": 977200,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937200573.141, "dur": 42.120, + "args": { + "External id": 977201,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937200621.157, "dur": 6.534, + "args": { + "External id": 977202,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937200623.239, "dur": 2.725, + "args": { + "External id": 977203,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937200634.231, "dur": 1.755, + "args": { + "External id": 977204,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937200647.711, "dur": 8.587, + "args": { + "External id": 977205,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937200650.019, "dur": 5.902, + "args": { + "External id": 977206,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937200755.697, "dur": 187.322, + "args": { + "External id": 977207,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937200757.721, "dur": 2.216, + "args": { + "External id": 977208,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345937200761.413, "dur": 181.176, + "args": { + "External id": 977209,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345937200762.938, "dur": 0.444, + "args": { + "External id": 977210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345937200764.666, "dur": 23.678, + "args": { + "External id": 977211,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345937200792.717, "dur": 3.230, + "args": { + "External id": 977212,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937200794.851, "dur": 0.813, + "args": { + "External id": 977213,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937200796.930, "dur": 23.838, + "args": { + "External id": 977214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345937200798.448, "dur": 1.716, + "args": { + "External id": 977215,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345937200801.145, "dur": 19.334, + "args": { + "External id": 977216,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937200805.439, "dur": 2.603, + "args": { + "External id": 977217,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345937200822.082, "dur": 22.286, + "args": { + "External id": 977218,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937200846.048, "dur": 13.304, + "args": { + "External id": 977219,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345937200862.185, "dur": 14.598, + "args": { + "External id": 977220,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345937200878.251, "dur": 12.480, + "args": { + "External id": 977221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937200892.495, "dur": 23.158, + "args": { + "External id": 977222,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345937200897.162, "dur": 1.897, + "args": { + "External id": 977223,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937200900.878, "dur": 0.747, + "args": { + "External id": 977224,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345937200917.224, "dur": 12.393, + "args": { + "External id": 977225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937200930.738, "dur": 10.576, + "args": { + "External id": 977226,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345937200950.479, "dur": 1.585, + "args": { + "External id": 977227,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937200960.908, "dur": 3.652, + "args": { + "External id": 977228,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937200963.186, "dur": 0.404, + "args": { + "External id": 977229,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937201094.529, "dur": 75.962, + "args": { + "External id": 977230,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345937201177.726, "dur": 6.818, + "args": { + "External id": 977231,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937201181.505, "dur": 1.083, + "args": { + "External id": 977232,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937201186.394, "dur": 31.596, + "args": { + "External id": 977233,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345937201223.438, "dur": 7.749, + "args": { + "External id": 977234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345937201224.981, "dur": 5.293, + "args": { + "External id": 977235,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937201229.228, "dur": 0.845, + "args": { + "External id": 977236,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345937201234.837, "dur": 48.284, + "args": { + "External id": 977237,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345937201235.904, "dur": 46.545, + "args": { + "External id": 977238,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937201287.686, "dur": 18.087, + "args": { + "External id": 977239,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937201310.447, "dur": 28.452, + "args": { + "External id": 977240,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 16284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345937201313.164, "dur": 25.261, + "args": { + "External id": 977241,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937201320.346, "dur": 0.694, + "args": { + "External id": 977242,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 16286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345937201344.817, "dur": 30.637, + "args": { + "External id": 977243,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 16287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345937201346.932, "dur": 28.268, + "args": { + "External id": 977244,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 16288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937201354.307, "dur": 4.258, + "args": { + "External id": 977245,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345937201359.897, "dur": 14.719, + "args": { + "External id": 977246,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2338710, + "ts": 6345937201388.318, "dur": 6.896, + "args": { + "External id": 977247,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 16291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2338710, + "ts": 6345937201390.643, "dur": 4.301, + "args": { + "External id": 977248,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 16292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2338710, + "ts": 6345937201396.347, "dur": 1.387, + "args": { + "External id": 977249,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2338710, + "ts": 6345937201396.995, "dur": 0.648, + "args": { + "External id": 977250,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937201445.522, "dur": 27.066, + "args": { + "External id": 977251,"Sequence number": 10552466, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 16295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345937201477.098, "dur": 14.372, + "args": { + "External id": 977252,"Sequence number": 10552467, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 16296 + } + }, + { + "ph": "s", "id": 2, "pid": 2338710, "tid": 2338710, "ts": 6345937201477.098, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345937201615.915, "dur": 47.241, + "args": { + "External id": 977253,"Record function id": 0, "Ev Idx": 16297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338710, "tid": 2338710, + "ts": 6345937201778.815, "dur": 35.680, + "args": { + "External id": 977254,"Sequence number": 10552468, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 16298 + } + }, + { + "ph": "s", "id": 1, "pid": 2338710, "tid": 2338710, "ts": 6345937201778.815, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ones_like", "pid": 2338710, "tid": 2338710, + "ts": 6345937201897.887, "dur": 31.987, + "args": { + "External id": 977255,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1], [], [], [], [], []], "Ev Idx": 16299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345937201899.930, "dur": 9.726, + "args": { + "External id": 977256,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1], [], [], [], [], []], "Ev Idx": 16300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345937201903.791, "dur": 5.228, + "args": { + "External id": 977257,"Record function id": 0, "Concrete Inputs": ["[1]", "[1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345937201911.602, "dur": 17.876, + "args": { + "External id": 977258,"Record function id": 0, "Concrete Inputs": ["", "1."], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 16302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338710, "tid": 2338710, + "ts": 6345939291195.781, "dur": 89.119, + "args": { + "External id": 977259,"Sequence number": 10552469, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 16303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338710, "tid": 2338710, + "ts": 6345939291296.242, "dur": 22.640, + "args": { + "External id": 977260,"Sequence number": 10552470, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 16304 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "enumerate(DataLoader)#_StatefulMultiProcessingDataLoaderIter.__next__", "pid": 2338710, "tid": 2338710, + "ts": 6345939291358.365, "dur": 145.311, + "args": { + "External id": 977261,"Record function id": 0, "Ev Idx": 16305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345939291901.271, "dur": 18.114, + "args": { + "External id": 977262,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], []], "Ev Idx": 16306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939291912.386, "dur": 2.911, + "args": { + "External id": 977263,"Record function id": 0, "Concrete Inputs": ["", "[8, 8192]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 16307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345939291921.546, "dur": 512.198, + "args": { + "External id": 977264,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], []], "Ev Idx": 16308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939292425.102, "dur": 4.079, + "args": { + "External id": 977265,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 16309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345939292465.683, "dur": 16436.245, + "args": { + "External id": 977266,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], [], []], "Ev Idx": 16310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939292472.126, "dur": 16428.633, + "args": { + "External id": 977267,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], []], "Ev Idx": 16311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939292481.434, "dur": 12.198, + "args": { + "External id": 977268,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "[4096, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939292495.749, "dur": 16402.857, + "args": { + "External id": 977269,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 16313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345939292506.343, "dur": 0.396, + "args": { + "External id": 977270,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 16314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2338710, "tid": 2338710, + "ts": 6345939292511.653, "dur": 9.380, + "args": { + "External id": 977271,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[8192, 1], [4096, 1]], "Input Dims": [[8, 4096], [8, 4096]], "Ev Idx": 16315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338710, "tid": 2338710, + "ts": 6345939292514.370, "dur": 6.436, + "args": { + "External id": 977272,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], []], "Input Dims": [[8, 4096], [], []], "Ev Idx": 16316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939292519.525, "dur": 1.002, + "args": { + "External id": 977273,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 16317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338710, "tid": 2338710, + "ts": 6345939292523.035, "dur": 141.811, + "args": { + "External id": 977274,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 16318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2338710, + "ts": 6345939292525.186, "dur": 139.297, + "args": { + "External id": 977275,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 16319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345939292527.237, "dur": 8.113, + "args": { + "External id": 977276,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 16320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939292530.270, "dur": 4.442, + "args": { + "External id": 977277,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939292538.273, "dur": 125.780, + "args": { + "External id": 977278,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 16322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939292667.851, "dur": 16224.193, + "args": { + "External id": 977279,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 16323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345939308923.643, "dur": 392.999, + "args": { + "External id": 977280,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], [], []], "Ev Idx": 16324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939308926.220, "dur": 389.684, + "args": { + "External id": 977281,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], []], "Ev Idx": 16325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939308933.208, "dur": 10.715, + "args": { + "External id": 977282,"Record function id": 0, "Concrete Inputs": ["[8, 8192]", "[8192, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939308945.571, "dur": 364.557, + "args": { + "External id": 977283,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[8192, 1], [8192, 1], []], "Input Dims": [[8, 8192], [8, 8192], []], "Ev Idx": 16327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2338710, "tid": 2338710, + "ts": 6345939309364.284, "dur": 85.536, + "args": { + "External id": 977284,"Record function id": 0, "Concrete Inputs": ["0", "4096", "", "", "", "False"], "Input type": ["Scalar", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939309373.429, "dur": 8.469, + "args": { + "External id": 977285,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2338710, "tid": 2338710, + "ts": 6345939309387.576, "dur": 61.580, + "args": { + "External id": 977286,"Record function id": 0, "Concrete Inputs": ["0", "4096", "1", ""], "Input type": ["Scalar", "Scalar", "Scalar", "long int"], "Input Strides": [[], [], [], [1]], "Input Dims": [[], [], [], [0]], "Ev Idx": 16330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345939309397.010, "dur": 10.155, + "args": { + "External id": 977287,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["long int", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::repeat", "pid": 2338710, "tid": 2338710, + "ts": 6345939309467.528, "dur": 120.387, + "args": { + "External id": 977288,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338710, "tid": 2338710, + "ts": 6345939309475.165, "dur": 13.046, + "args": { + "External id": 977289,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[4096], [], []], "Ev Idx": 16333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939309483.838, "dur": 3.837, + "args": { + "External id": 977290,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "[4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939309490.589, "dur": 6.009, + "args": { + "External id": 977291,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2338710, + "ts": 6345939309500.265, "dur": 3.246, + "args": { + "External id": 977292,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[4096, 1]], "Input Dims": [[8, 4096]], "Ev Idx": 16336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338710, "tid": 2338710, + "ts": 6345939309508.222, "dur": 12.969, + "args": { + "External id": 977293,"Record function id": 0, "Concrete Inputs": ["", "0", "1", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 16337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939309515.931, "dur": 4.941, + "args": { + "External id": 977294,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1]", "[4096, 1, 4096]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 16338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338710, "tid": 2338710, + "ts": 6345939309522.880, "dur": 6.135, + "args": { + "External id": 977295,"Record function id": 0, "Concrete Inputs": ["", "1", "4096", "4096"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[8, 4096, 1], [], [], []], "Ev Idx": 16339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939309527.500, "dur": 1.291, + "args": { + "External id": 977296,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "[4096, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[8, 4096, 1], [], [], []], "Ev Idx": 16340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2338710, "tid": 2338710, + "ts": 6345939309532.867, "dur": 7.791, + "args": { + "External id": 977297,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[4096, 1], [4096, 4096, 4096, 1]], "Input Dims": [[1, 4096], [8, 1, 1, 4096]], "Ev Idx": 16341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338710, "tid": 2338710, + "ts": 6345939309535.098, "dur": 5.274, + "args": { + "External id": 977298,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1, 4096], [], []], "Ev Idx": 16342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939309538.756, "dur": 1.361, + "args": { + "External id": 977299,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "[0, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 16343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939309543.471, "dur": 43.254, + "args": { + "External id": 977300,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 4096, 4096, 1], [0, 4096, 4096, 1], []], "Input Dims": [[8, 1, 1, 4096], [8, 1, 1, 4096], []], "Ev Idx": 16344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345939309603.389, "dur": 47.147, + "args": { + "External id": 977301,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 16345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939309606.993, "dur": 43.279, + "args": { + "External id": 977302,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "", "", "", "False", ""], "Input type": ["long int", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], []], "Ev Idx": 16346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939309612.237, "dur": 6.735, + "args": { + "External id": 977303,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "[4096, 1]", "3", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939309621.595, "dur": 27.829, + "args": { + "External id": 977304,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 16348 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_pre_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345939309817.956, "dur": 321.543, + "args": { + "External id": 977305,"Record function id": 0, "Ev Idx": 16349 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::inputs_to_device", "pid": 2338710, "tid": 2338710, + "ts": 6345939309943.980, "dur": 175.387, + "args": { + "External id": 977306,"Record function id": 0, "Ev Idx": 16350 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345939310151.342, "dur": 66.856, + "args": { + "External id": 977307,"Record function id": 0, "Ev Idx": 16351 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345939310231.409, "dur": 14413.039, + "args": { + "External id": 977308,"Record function id": 0, "Ev Idx": 16352 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather", "pid": 2338710, "tid": 2338710, + "ts": 6345939310243.490, "dur": 2309.506, + "args": { + "External id": 977309,"Record function id": 0, "Ev Idx": 16353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939310417.402, "dur": 17.552, + "args": { + "External id": 977310,"Record function id": 0, "Concrete Inputs": ["[141824512]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939310462.155, "dur": 195.349, + "args": { + "External id": 977311,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["c10::BFloat16", "", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[141824512], [], []], "Ev Idx": 16355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310471.669, "dur": 3.885, + "args": { + "External id": 977312,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310481.191, "dur": 0.999, + "args": { + "External id": 977313,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "16384000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310484.563, "dur": 0.881, + "args": { + "External id": 977314,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "16384512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310488.620, "dur": 5.329, + "args": { + "External id": 977315,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "18481664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310495.907, "dur": 0.678, + "args": { + "External id": 977316,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19005952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310499.835, "dur": 0.767, + "args": { + "External id": 977317,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "19530240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310505.308, "dur": 0.786, + "args": { + "External id": 977318,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "21627392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310508.059, "dur": 0.713, + "args": { + "External id": 977319,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "21627904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310510.926, "dur": 0.883, + "args": { + "External id": 977320,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "28967936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310516.172, "dur": 0.688, + "args": { + "External id": 977321,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "36307968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310518.713, "dur": 0.600, + "args": { + "External id": 977322,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "43648000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310521.993, "dur": 6.911, + "args": { + "External id": 977323,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "43648512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310531.221, "dur": 0.507, + "args": { + "External id": 977324,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45745664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310535.053, "dur": 0.602, + "args": { + "External id": 977325,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46269952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310540.504, "dur": 0.444, + "args": { + "External id": 977326,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "46794240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310543.018, "dur": 0.593, + "args": { + "External id": 977327,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "48891392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310546.010, "dur": 0.888, + "args": { + "External id": 977328,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "48891904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310550.852, "dur": 0.658, + "args": { + "External id": 977329,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "56231936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310553.365, "dur": 0.741, + "args": { + "External id": 977330,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "63571968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310556.568, "dur": 4.226, + "args": { + "External id": 977331,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "70912000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310562.985, "dur": 0.393, + "args": { + "External id": 977332,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "70912512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310565.168, "dur": 0.416, + "args": { + "External id": 977333,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "73009664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310569.474, "dur": 0.612, + "args": { + "External id": 977334,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "73533952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310571.903, "dur": 0.945, + "args": { + "External id": 977335,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "74058240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310574.886, "dur": 0.940, + "args": { + "External id": 977336,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "76155392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310579.706, "dur": 0.424, + "args": { + "External id": 977337,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "76155904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310581.902, "dur": 0.697, + "args": { + "External id": 977338,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "83495936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310584.995, "dur": 4.463, + "args": { + "External id": 977339,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "90835968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310591.329, "dur": 0.620, + "args": { + "External id": 977340,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "98176000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310593.940, "dur": 0.637, + "args": { + "External id": 977341,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "98176512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310599.141, "dur": 0.755, + "args": { + "External id": 977342,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "100273664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310601.605, "dur": 0.598, + "args": { + "External id": 977343,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "100797952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310604.099, "dur": 0.792, + "args": { + "External id": 977344,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "101322240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310608.968, "dur": 0.639, + "args": { + "External id": 977345,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "103419392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310611.349, "dur": 0.748, + "args": { + "External id": 977346,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "103419904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310618.995, "dur": 6.309, + "args": { + "External id": 977347,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "110759936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310627.361, "dur": 0.434, + "args": { + "External id": 977348,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "118099968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310631.113, "dur": 0.622, + "args": { + "External id": 977349,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "125440000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939310635.553, "dur": 0.585, + "args": { + "External id": 977350,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "125440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939310703.860, "dur": 214.031, + "args": { + "External id": 977351,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 16395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345939311168.872, "dur": 528.911, + "args": { + "External id": 977352,"Record function id": 0, "Concrete Inputs": ["", "", "141824512", "8", "4", "15", ""], "Input type": ["TensorList", "", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 16396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939311198.563, "dur": 17.121, + "args": { + "External id": 977353,"Record function id": 0, "Concrete Inputs": ["[1134596096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345939311224.983, "dur": 20.144, + "args": { + "External id": 977354,"Record function id": 0, "Concrete Inputs": ["", "0", "567298048", "141824512"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1134596096], [], [], []], "Ev Idx": 16398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345939311231.150, "dur": 13.036, + "args": { + "External id": 977355,"Record function id": 0, "Concrete Inputs": ["", "0", "567298048", "709122560", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[1134596096], [], [], [], []], "Ev Idx": 16399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311237.124, "dur": 1.807, + "args": { + "External id": 977356,"Record function id": 0, "Concrete Inputs": ["", "[141824512]", "[1]", "567298048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1134596096], [], [], []], "Ev Idx": 16400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939311262.856, "dur": 174.033, + "args": { + "External id": 977357,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["c10::BFloat16", "", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[141824512], [], []], "Ev Idx": 16401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311267.803, "dur": 0.630, + "args": { + "External id": 977358,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "567298048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311270.748, "dur": 3.650, + "args": { + "External id": 977359,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "583682048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311276.434, "dur": 3.408, + "args": { + "External id": 977360,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "583682560"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311282.316, "dur": 0.748, + "args": { + "External id": 977361,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "585779712"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311287.574, "dur": 0.703, + "args": { + "External id": 977362,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "586304000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311290.549, "dur": 0.481, + "args": { + "External id": 977363,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "586828288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311293.060, "dur": 0.851, + "args": { + "External id": 977364,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "588925440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311298.501, "dur": 0.735, + "args": { + "External id": 977365,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "588925952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311300.993, "dur": 0.796, + "args": { + "External id": 977366,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "596265984"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311304.126, "dur": 2.593, + "args": { + "External id": 977367,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "603606016"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311308.518, "dur": 3.114, + "args": { + "External id": 977368,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "610946048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311313.666, "dur": 0.782, + "args": { + "External id": 977369,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "610946560"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311319.492, "dur": 0.785, + "args": { + "External id": 977370,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "613043712"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311322.553, "dur": 0.746, + "args": { + "External id": 977371,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "613568000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311325.029, "dur": 0.684, + "args": { + "External id": 977372,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "614092288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311329.736, "dur": 0.719, + "args": { + "External id": 977373,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "616189440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311332.658, "dur": 0.401, + "args": { + "External id": 977374,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "616189952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311335.097, "dur": 3.336, + "args": { + "External id": 977375,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "623529984"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311340.325, "dur": 3.736, + "args": { + "External id": 977376,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "630870016"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311346.270, "dur": 0.927, + "args": { + "External id": 977377,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "638210048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311351.408, "dur": 0.427, + "args": { + "External id": 977378,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "638210560"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311354.043, "dur": 0.529, + "args": { + "External id": 977379,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "640307712"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311356.929, "dur": 0.531, + "args": { + "External id": 977380,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "640832000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311361.987, "dur": 0.817, + "args": { + "External id": 977381,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "641356288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311364.611, "dur": 0.515, + "args": { + "External id": 977382,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "643453440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311367.442, "dur": 3.247, + "args": { + "External id": 977383,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "643453952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311372.529, "dur": 2.860, + "args": { + "External id": 977384,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "650793984"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311377.533, "dur": 0.742, + "args": { + "External id": 977385,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "658134016"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311382.281, "dur": 0.756, + "args": { + "External id": 977386,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "665474048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311385.384, "dur": 0.584, + "args": { + "External id": 977387,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "665474560"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311387.517, "dur": 0.900, + "args": { + "External id": 977388,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "667571712"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311393.033, "dur": 0.486, + "args": { + "External id": 977389,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "668096000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311395.304, "dur": 0.578, + "args": { + "External id": 977390,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "668620288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311397.766, "dur": 1.871, + "args": { + "External id": 977391,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "670717440"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311401.567, "dur": 3.341, + "args": { + "External id": 977392,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "670717952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311407.085, "dur": 0.649, + "args": { + "External id": 977393,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "678057984"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311411.217, "dur": 0.621, + "args": { + "External id": 977394,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "685398016"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311413.721, "dur": 0.736, + "args": { + "External id": 977395,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "692738048"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939311416.678, "dur": 0.713, + "args": { + "External id": 977396,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "692738560"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939311477.197, "dur": 190.970, + "args": { + "External id": 977397,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 16441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345939311805.705, "dur": 557.082, + "args": { + "External id": 977398,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[1134596096], [141824512], [], [], []], "Ev Idx": 16442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939311852.204, "dur": 500.396, + "args": { + "External id": 977399,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1134596096, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[141824512], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16443, "In msg nelems": 141824512 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345939311868.281, "dur": 472.024, + "args": { + "External id": 977400,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[141824512]], "Ev Idx": 16444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939312403.607, "dur": 3.866, + "args": { + "External id": 977401,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16445, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out", "pid": 2338710, "tid": 2338710, + "ts": 6345939312577.538, "dur": 11771.319, + "args": { + "External id": 977402,"Record function id": 0, "Ev Idx": 16446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939312889.387, "dur": 9.514, + "args": { + "External id": 977403,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1134596096], []], "Ev Idx": 16447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939312905.364, "dur": 1.875, + "args": { + "External id": 977404,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[131072000], []], "Ev Idx": 16448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939312910.513, "dur": 2.155, + "args": { + "External id": 977405,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939312918.605, "dur": 3.897, + "args": { + "External id": 977406,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939312925.096, "dur": 1.375, + "args": { + "External id": 977407,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939312929.410, "dur": 1.320, + "args": { + "External id": 977408,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939312967.706, "dur": 1.853, + "args": { + "External id": 983553,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939312975.216, "dur": 2.419, + "args": { + "External id": 983554,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939312980.562, "dur": 1.135, + "args": { + "External id": 983555,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939312984.889, "dur": 1.022, + "args": { + "External id": 983556,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939312988.967, "dur": 2.025, + "args": { + "External id": 983557,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939312997.366, "dur": 3.648, + "args": { + "External id": 983558,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313003.807, "dur": 1.058, + "args": { + "External id": 983559,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313041.318, "dur": 3.551, + "args": { + "External id": 983560,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313050.315, "dur": 1.202, + "args": { + "External id": 983561,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313114.227, "dur": 5.422, + "args": { + "External id": 983562,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313124.957, "dur": 1.016, + "args": { + "External id": 983563,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313128.608, "dur": 1.004, + "args": { + "External id": 983564,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313133.154, "dur": 1.222, + "args": { + "External id": 983565,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313139.691, "dur": 4.483, + "args": { + "External id": 983566,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313146.949, "dur": 1.006, + "args": { + "External id": 983567,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313153.782, "dur": 1.113, + "args": { + "External id": 983568,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313157.448, "dur": 1.425, + "args": { + "External id": 983569,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313164.059, "dur": 5.152, + "args": { + "External id": 983570,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313172.898, "dur": 1.271, + "args": { + "External id": 983571,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313176.958, "dur": 1.154, + "args": { + "External id": 983572,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313181.807, "dur": 1.368, + "args": { + "External id": 983573,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313188.415, "dur": 3.894, + "args": { + "External id": 983574,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313196.076, "dur": 1.307, + "args": { + "External id": 983575,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313200.579, "dur": 1.280, + "args": { + "External id": 983576,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313205.065, "dur": 0.962, + "args": { + "External id": 983577,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313210.895, "dur": 2.577, + "args": { + "External id": 983578,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313221.220, "dur": 1.154, + "args": { + "External id": 983579,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313225.139, "dur": 2.107, + "args": { + "External id": 983580,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313232.324, "dur": 1.267, + "args": { + "External id": 983581,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313238.785, "dur": 3.377, + "args": { + "External id": 983582,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313244.889, "dur": 1.035, + "args": { + "External id": 983583,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313248.184, "dur": 1.671, + "args": { + "External id": 983584,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313252.563, "dur": 1.185, + "args": { + "External id": 983585,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939313259.189, "dur": 2.422, + "args": { + "External id": 983586,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[131072000], []], "Ev Idx": 16486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939313309.763, "dur": 10927.772, + "args": { + "External id": 983587,"Record function id": 0, "Concrete Inputs": ["", "", "1", ""], "Input type": ["c10::BFloat16", "", "Scalar", "TensorList"], "Input Strides": [[141824512, 1], [], [], []], "Input Dims": [[8, 141824512], [], [], []], "Ev Idx": 16487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939313346.472, "dur": 10871.259, + "args": { + "External id": 983588,"Record function id": 0, "Concrete Inputs": ["", "", "1", ""], "Input type": ["c10::BFloat16", "", "Scalar", "TensorList"], "Input Strides": [[141824512, 1], [], [], []], "Input Dims": [[8, 141824512], [], [], []], "Ev Idx": 16488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939313381.966, "dur": 23.907, + "args": { + "External id": 983589,"Record function id": 0, "Concrete Inputs": ["[4384]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345939313415.944, "dur": 10704.086, + "args": { + "External id": 983590,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[4384], [], [], [], [], [], [], []], "Ev Idx": 16490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939313419.358, "dur": 10698.795, + "args": { + "External id": 983591,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[4384], [], [], [], [], [], []], "Ev Idx": 16491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939313427.097, "dur": 9.886, + "args": { + "External id": 983592,"Record function id": 0, "Concrete Inputs": ["[4384]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939313439.690, "dur": 10667.929, + "args": { + "External id": 983593,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4384], [4384], []], "Ev Idx": 16493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345939324770.870, "dur": 51.521, + "args": { + "External id": 983594,"Record function id": 0, "Ev Idx": 16494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 0/0", "pid": 2338710, "tid": 2338710, + "ts": 6345939324824.951, "dur": 423.018, + "args": { + "External id": 983595,"Record function id": 0, "Ev Idx": 16495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939324885.256, "dur": 346.097, + "args": { + "External id": 983596,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "long int"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32000, 4096], [8, 4096]], "Ev Idx": 16496 + } + }, + { + "ph": "s", "id": 448, "pid": 2338710, "tid": 2338710, "ts": 6345939324885.256, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939324998.151, "dur": 171.737, + "args": { + "External id": 983597,"kernel_hash": "cvykvfdpfag3zvgkkgm7dmy4omdmsmvjcopgv6veoygd3dudukzf", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/vy/cvykvfdpfag3zvgkkgm7dmy4omdmsmvjcopgv6veoygd3dudukzf.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096], [32000, 4096], [8, 4096, 4096], []], "Ev Idx": 16497 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345939325359.902, "dur": 94.271, + "args": { + "External id": 983598,"Record function id": 0, "Ev Idx": 16498 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.0)", "pid": 2338710, "tid": 2338710, + "ts": 6345939325473.666, "dur": 7894.718, + "args": { + "External id": 983599,"Record function id": 0, "Ev Idx": 16499 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2338710, "tid": 2338710, + "ts": 6345939325487.701, "dur": 1102.039, + "args": { + "External id": 983600,"Record function id": 0, "Ev Idx": 16500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939325599.774, "dur": 20.061, + "args": { + "External id": 983601,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939325640.383, "dur": 46.713, + "args": { + "External id": 983602,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939325650.578, "dur": 3.045, + "args": { + "External id": 983603,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939325656.881, "dur": 0.759, + "args": { + "External id": 983604,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939325658.927, "dur": 3.007, + "args": { + "External id": 983605,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939325663.181, "dur": 0.714, + "args": { + "External id": 983606,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939325664.944, "dur": 0.388, + "args": { + "External id": 983607,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939325669.272, "dur": 0.347, + "args": { + "External id": 983608,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939325670.721, "dur": 0.629, + "args": { + "External id": 983609,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939325672.540, "dur": 3.027, + "args": { + "External id": 983610,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939325679.050, "dur": 0.634, + "args": { + "External id": 983611,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939325699.745, "dur": 64.538, + "args": { + "External id": 983612,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345939325805.078, "dur": 142.302, + "args": { + "External id": 983613,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939325819.879, "dur": 6.111, + "args": { + "External id": 983614,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345939325832.420, "dur": 12.050, + "args": { + "External id": 983615,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345939325838.028, "dur": 5.919, + "args": { + "External id": 983616,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939325841.910, "dur": 0.526, + "args": { + "External id": 983617,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939325852.604, "dur": 30.534, + "args": { + "External id": 983618,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939325854.519, "dur": 0.705, + "args": { + "External id": 983619,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939325856.902, "dur": 0.750, + "args": { + "External id": 983620,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939325860.812, "dur": 0.593, + "args": { + "External id": 983621,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939325862.504, "dur": 0.427, + "args": { + "External id": 983622,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939325864.373, "dur": 4.921, + "args": { + "External id": 983623,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939325870.437, "dur": 0.621, + "args": { + "External id": 983624,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939325872.152, "dur": 0.376, + "args": { + "External id": 983625,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939325875.979, "dur": 0.353, + "args": { + "External id": 983626,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939325877.455, "dur": 0.627, + "args": { + "External id": 983627,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939325895.166, "dur": 41.573, + "args": { + "External id": 983628,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345939326039.222, "dur": 405.617, + "args": { + "External id": 983629,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939326119.788, "dur": 319.001, + "args": { + "External id": 983630,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16530, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345939326136.218, "dur": 295.933, + "args": { + "External id": 983631,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939326474.690, "dur": 3.018, + "args": { + "External id": 983632,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16532, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2338710, "tid": 2338710, + "ts": 6345939326622.549, "dur": 6376.236, + "args": { + "External id": 983633,"Record function id": 0, "Ev Idx": 16533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939326771.486, "dur": 9.811, + "args": { + "External id": 983634,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939326786.273, "dur": 1.948, + "args": { + "External id": 983635,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939326790.801, "dur": 1.745, + "args": { + "External id": 983636,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939326795.836, "dur": 3.815, + "args": { + "External id": 983637,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939326801.964, "dur": 1.317, + "args": { + "External id": 983638,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939326805.756, "dur": 1.922, + "args": { + "External id": 983639,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939326810.352, "dur": 1.539, + "args": { + "External id": 983640,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939326817.209, "dur": 2.980, + "args": { + "External id": 983641,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939326822.818, "dur": 1.375, + "args": { + "External id": 983642,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939326826.885, "dur": 1.784, + "args": { + "External id": 983643,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939326853.146, "dur": 6062.031, + "args": { + "External id": 983644,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939326880.219, "dur": 6019.059, + "args": { + "External id": 983645,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939326902.281, "dur": 23.751, + "args": { + "External id": 983646,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345939326932.532, "dur": 5907.698, + "args": { + "External id": 983647,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939326935.865, "dur": 5903.099, + "args": { + "External id": 983648,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939326944.251, "dur": 10.086, + "args": { + "External id": 983649,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939326956.891, "dur": 5875.609, + "args": { + "External id": 983650,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939333278.996, "dur": 53.256, + "args": { + "External id": 983651,"Sequence number": 10552472, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16551 + } + }, + { + "ph": "s", "id": 447, "pid": 2338710, "tid": 2338710, "ts": 6345939333278.996, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345939333307.285, "dur": 18.009, + "args": { + "External id": 983652,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939333314.843, "dur": 9.990, + "args": { + "External id": 983653,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345939333429.547, "dur": 112.414, + "args": { + "External id": 983654,"Record function id": 0, "Ev Idx": 16554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345939333543.798, "dur": 1385.998, + "args": { + "External id": 983655,"Record function id": 0, "Ev Idx": 16555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939333594.722, "dur": 1316.390, + "args": { + "External id": 983656,"Sequence number": 10552473, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16556 + } + }, + { + "ph": "s", "id": 446, "pid": 2338710, "tid": 2338710, "ts": 6345939333594.722, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939333690.667, "dur": 59.585, + "args": { + "External id": 983657,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939333767.192, "dur": 123.547, + "args": { + "External id": 983658,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939333906.046, "dur": 46.821, + "args": { + "External id": 983659,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939333963.782, "dur": 35.536, + "args": { + "External id": 983660,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939334098.020, "dur": 38.126, + "args": { + "External id": 983661,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345939334167.591, "dur": 24.482, + "args": { + "External id": 983662,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345939334221.087, "dur": 166.395, + "args": { + "External id": 983663,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345939334286.646, "dur": 17.744, + "args": { + "External id": 983664,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939334294.481, "dur": 8.782, + "args": { + "External id": 983665,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939334308.591, "dur": 5.477, + "args": { + "External id": 983666,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939334315.660, "dur": 1.494, + "args": { + "External id": 983667,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939334320.007, "dur": 4.238, + "args": { + "External id": 983668,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939334400.972, "dur": 70.780, + "args": { + "External id": 983669,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345939334510.907, "dur": 38.141, + "args": { + "External id": 983670,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939334557.169, "dur": 53.057, + "args": { + "External id": 983671,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939334619.702, "dur": 41.426, + "args": { + "External id": 983672,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939334690.674, "dur": 31.307, + "args": { + "External id": 983673,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939334730.886, "dur": 44.374, + "args": { + "External id": 983674,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939334798.734, "dur": 24.434, + "args": { + "External id": 983675,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16575 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.0)", "pid": 2338710, "tid": 2338710, + "ts": 6345939335028.318, "dur": 145.500, + "args": { + "External id": 983676,"Record function id": 0, "Ev Idx": 16576 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345939335267.598, "dur": 57.925, + "args": { + "External id": 983677,"Record function id": 0, "Ev Idx": 16577 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.1)", "pid": 2338710, "tid": 2338710, + "ts": 6345939335336.383, "dur": 27630.554, + "args": { + "External id": 983678,"Record function id": 0, "Ev Idx": 16578 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2338710, "tid": 2338710, + "ts": 6345939335345.148, "dur": 1222.197, + "args": { + "External id": 983679,"Record function id": 0, "Ev Idx": 16579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939335441.674, "dur": 12.172, + "args": { + "External id": 983680,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939335470.478, "dur": 42.260, + "args": { + "External id": 983681,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939335476.809, "dur": 4.768, + "args": { + "External id": 983682,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939335483.714, "dur": 0.534, + "args": { + "External id": 983683,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939335485.500, "dur": 0.711, + "args": { + "External id": 983684,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939335489.940, "dur": 0.508, + "args": { + "External id": 983685,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939335491.800, "dur": 3.509, + "args": { + "External id": 983686,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939335496.343, "dur": 0.788, + "args": { + "External id": 983687,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939335500.022, "dur": 0.523, + "args": { + "External id": 983688,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939335501.698, "dur": 0.287, + "args": { + "External id": 983689,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939335503.323, "dur": 2.153, + "args": { + "External id": 983690,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939335525.112, "dur": 66.786, + "args": { + "External id": 983691,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345939335640.635, "dur": 145.376, + "args": { + "External id": 983692,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939335653.444, "dur": 6.734, + "args": { + "External id": 983693,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345939335666.776, "dur": 12.422, + "args": { + "External id": 983694,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345939335672.073, "dur": 6.592, + "args": { + "External id": 983695,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939335676.403, "dur": 0.829, + "args": { + "External id": 983696,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939335687.305, "dur": 33.414, + "args": { + "External id": 983697,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939335689.323, "dur": 0.700, + "args": { + "External id": 983698,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939335691.696, "dur": 5.710, + "args": { + "External id": 983699,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939335699.396, "dur": 0.527, + "args": { + "External id": 983700,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939335701.413, "dur": 0.627, + "args": { + "External id": 983701,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939335706.011, "dur": 0.388, + "args": { + "External id": 983702,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939335707.525, "dur": 0.635, + "args": { + "External id": 983703,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939335709.131, "dur": 0.329, + "args": { + "External id": 983704,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939335713.291, "dur": 0.375, + "args": { + "External id": 983705,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939335715.198, "dur": 0.243, + "args": { + "External id": 983706,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939335734.998, "dur": 39.347, + "args": { + "External id": 983707,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345939335849.937, "dur": 586.884, + "args": { + "External id": 983708,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939335887.087, "dur": 542.574, + "args": { + "External id": 983709,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16609, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345939335899.019, "dur": 520.140, + "args": { + "External id": 983710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939336472.033, "dur": 3.081, + "args": { + "External id": 983711,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16611, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2338710, "tid": 2338710, + "ts": 6345939336590.801, "dur": 26075.101, + "args": { + "External id": 983712,"Record function id": 0, "Ev Idx": 16612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939336707.206, "dur": 7.846, + "args": { + "External id": 983713,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939336718.854, "dur": 1.021, + "args": { + "External id": 983714,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939336721.782, "dur": 1.344, + "args": { + "External id": 983715,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939336725.085, "dur": 1.036, + "args": { + "External id": 983716,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939336727.996, "dur": 1.103, + "args": { + "External id": 983717,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939336733.013, "dur": 1.425, + "args": { + "External id": 983718,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939336736.174, "dur": 1.445, + "args": { + "External id": 983719,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939336739.240, "dur": 5.462, + "args": { + "External id": 983720,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939336746.606, "dur": 0.788, + "args": { + "External id": 983721,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939336751.428, "dur": 0.739, + "args": { + "External id": 983722,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939336772.908, "dur": 25804.159, + "args": { + "External id": 983723,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939336792.458, "dur": 25767.410, + "args": { + "External id": 983724,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939336812.187, "dur": 17.499, + "args": { + "External id": 983725,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345939336834.072, "dur": 25660.388, + "args": { + "External id": 983726,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939336837.195, "dur": 25655.737, + "args": { + "External id": 983727,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939336844.380, "dur": 6.115, + "args": { + "External id": 983728,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939336852.509, "dur": 25632.244, + "args": { + "External id": 983729,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939362882.207, "dur": 50.867, + "args": { + "External id": 983730,"Sequence number": 10552474, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16630 + } + }, + { + "ph": "s", "id": 445, "pid": 2338710, "tid": 2338710, "ts": 6345939362882.207, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345939362908.984, "dur": 17.343, + "args": { + "External id": 983731,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939362916.532, "dur": 9.513, + "args": { + "External id": 983732,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345939363040.271, "dur": 126.816, + "args": { + "External id": 983733,"Record function id": 0, "Ev Idx": 16633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345939363170.103, "dur": 1402.648, + "args": { + "External id": 983734,"Record function id": 0, "Ev Idx": 16634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939363222.481, "dur": 1332.203, + "args": { + "External id": 983735,"Sequence number": 10552475, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16635 + } + }, + { + "ph": "s", "id": 444, "pid": 2338710, "tid": 2338710, "ts": 6345939363222.481, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939363326.946, "dur": 62.441, + "args": { + "External id": 983736,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939363408.716, "dur": 120.539, + "args": { + "External id": 983737,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939363542.437, "dur": 47.802, + "args": { + "External id": 983738,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939363601.150, "dur": 35.493, + "args": { + "External id": 983739,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939363667.507, "dur": 31.396, + "args": { + "External id": 983740,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345939363731.220, "dur": 22.698, + "args": { + "External id": 983741,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345939363779.733, "dur": 161.333, + "args": { + "External id": 983742,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345939363840.209, "dur": 21.243, + "args": { + "External id": 983743,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939363848.030, "dur": 11.119, + "args": { + "External id": 983744,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939363864.570, "dur": 4.694, + "args": { + "External id": 983745,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939363870.623, "dur": 1.434, + "args": { + "External id": 983746,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939363874.888, "dur": 5.983, + "args": { + "External id": 983747,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939363955.354, "dur": 83.897, + "args": { + "External id": 983748,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345939364135.130, "dur": 41.811, + "args": { + "External id": 983749,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939364190.227, "dur": 59.945, + "args": { + "External id": 983750,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939364258.901, "dur": 42.472, + "args": { + "External id": 983751,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939364331.184, "dur": 31.324, + "args": { + "External id": 983752,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939364372.225, "dur": 45.328, + "args": { + "External id": 983753,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939364443.285, "dur": 23.727, + "args": { + "External id": 983754,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16654 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.1)", "pid": 2338710, "tid": 2338710, + "ts": 6345939364650.047, "dur": 91.175, + "args": { + "External id": 983755,"Record function id": 0, "Ev Idx": 16655 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345939364828.280, "dur": 54.516, + "args": { + "External id": 983756,"Record function id": 0, "Ev Idx": 16656 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.2)", "pid": 2338710, "tid": 2338710, + "ts": 6345939364893.242, "dur": 29466.697, + "args": { + "External id": 983757,"Record function id": 0, "Ev Idx": 16657 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2338710, "tid": 2338710, + "ts": 6345939364902.494, "dur": 1098.119, + "args": { + "External id": 983758,"Record function id": 0, "Ev Idx": 16658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939364996.426, "dur": 30.879, + "args": { + "External id": 983759,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939365046.394, "dur": 87.434, + "args": { + "External id": 983760,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939365095.308, "dur": 4.334, + "args": { + "External id": 983761,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939365105.065, "dur": 0.462, + "args": { + "External id": 983762,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939365106.978, "dur": 0.523, + "args": { + "External id": 983763,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939365109.021, "dur": 0.420, + "args": { + "External id": 983764,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939365112.940, "dur": 0.466, + "args": { + "External id": 983765,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939365114.815, "dur": 0.619, + "args": { + "External id": 983766,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939365116.460, "dur": 5.965, + "args": { + "External id": 983767,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939365123.676, "dur": 0.368, + "args": { + "External id": 983768,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939365125.310, "dur": 0.385, + "args": { + "External id": 983769,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939365147.573, "dur": 66.173, + "args": { + "External id": 983770,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345939365257.891, "dur": 141.760, + "args": { + "External id": 983771,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939365273.260, "dur": 6.138, + "args": { + "External id": 983772,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345939365285.685, "dur": 12.233, + "args": { + "External id": 983773,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345939365291.382, "dur": 6.026, + "args": { + "External id": 983774,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939365295.385, "dur": 0.556, + "args": { + "External id": 983775,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939365306.815, "dur": 33.596, + "args": { + "External id": 983776,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939365309.426, "dur": 2.887, + "args": { + "External id": 983777,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939365313.626, "dur": 0.583, + "args": { + "External id": 983778,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939365315.660, "dur": 0.506, + "args": { + "External id": 983779,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939365319.976, "dur": 2.954, + "args": { + "External id": 983780,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939365323.987, "dur": 0.556, + "args": { + "External id": 983781,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939365326.100, "dur": 0.502, + "args": { + "External id": 983782,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939365329.870, "dur": 0.508, + "args": { + "External id": 983783,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939365331.326, "dur": 0.376, + "args": { + "External id": 983784,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939365332.698, "dur": 2.717, + "args": { + "External id": 983785,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939365353.562, "dur": 36.469, + "args": { + "External id": 983786,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345939365466.734, "dur": 423.591, + "args": { + "External id": 983787,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939365505.364, "dur": 379.026, + "args": { + "External id": 983788,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16688, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345939365520.222, "dur": 357.419, + "args": { + "External id": 983789,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939365916.678, "dur": 2.692, + "args": { + "External id": 983790,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16690, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2338710, "tid": 2338710, + "ts": 6345939366051.753, "dur": 28044.813, + "args": { + "External id": 983791,"Record function id": 0, "Ev Idx": 16691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939366231.566, "dur": 9.699, + "args": { + "External id": 983792,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939366246.018, "dur": 1.066, + "args": { + "External id": 983793,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939366249.244, "dur": 4.089, + "args": { + "External id": 983794,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939366257.474, "dur": 0.924, + "args": { + "External id": 983795,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939366260.240, "dur": 0.974, + "args": { + "External id": 983796,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939366262.751, "dur": 1.104, + "args": { + "External id": 983797,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939366265.428, "dur": 1.213, + "args": { + "External id": 983798,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939366270.978, "dur": 2.430, + "args": { + "External id": 983799,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939366275.035, "dur": 1.047, + "args": { + "External id": 983800,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939366277.787, "dur": 0.793, + "args": { + "External id": 983801,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939366301.345, "dur": 27694.472, + "args": { + "External id": 983802,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939366318.991, "dur": 27666.270, + "args": { + "External id": 983803,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939366337.538, "dur": 19.574, + "args": { + "External id": 983804,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345939366361.674, "dur": 27578.949, + "args": { + "External id": 983805,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939366365.331, "dur": 27573.596, + "args": { + "External id": 983806,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939366372.615, "dur": 6.305, + "args": { + "External id": 983807,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939366380.977, "dur": 27554.221, + "args": { + "External id": 983808,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939394277.576, "dur": 48.373, + "args": { + "External id": 983809,"Sequence number": 10552476, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16709 + } + }, + { + "ph": "s", "id": 443, "pid": 2338710, "tid": 2338710, "ts": 6345939394277.576, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345939394307.112, "dur": 11.847, + "args": { + "External id": 983810,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939394312.303, "dur": 6.385, + "args": { + "External id": 983811,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345939394418.122, "dur": 96.816, + "args": { + "External id": 983812,"Record function id": 0, "Ev Idx": 16712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345939394516.522, "dur": 1360.746, + "args": { + "External id": 983813,"Record function id": 0, "Ev Idx": 16713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939394562.431, "dur": 1297.321, + "args": { + "External id": 983814,"Sequence number": 10552477, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16714 + } + }, + { + "ph": "s", "id": 442, "pid": 2338710, "tid": 2338710, "ts": 6345939394562.431, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939394655.276, "dur": 61.142, + "args": { + "External id": 983815,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939394732.755, "dur": 123.869, + "args": { + "External id": 983816,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939394871.507, "dur": 44.229, + "args": { + "External id": 983817,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939394925.400, "dur": 36.268, + "args": { + "External id": 983818,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939394989.836, "dur": 53.973, + "args": { + "External id": 983819,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345939395119.246, "dur": 27.902, + "args": { + "External id": 983820,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345939395177.084, "dur": 168.189, + "args": { + "External id": 983821,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345939395241.368, "dur": 16.816, + "args": { + "External id": 983822,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939395248.917, "dur": 8.253, + "args": { + "External id": 983823,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939395261.973, "dur": 4.921, + "args": { + "External id": 983824,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939395268.424, "dur": 1.623, + "args": { + "External id": 983825,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939395272.974, "dur": 7.559, + "args": { + "External id": 983826,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939395359.142, "dur": 70.441, + "args": { + "External id": 983827,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345939395467.230, "dur": 34.898, + "args": { + "External id": 983828,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939395513.074, "dur": 51.995, + "args": { + "External id": 983829,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939395575.169, "dur": 40.931, + "args": { + "External id": 983830,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939395644.887, "dur": 30.950, + "args": { + "External id": 983831,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939395685.988, "dur": 43.131, + "args": { + "External id": 983832,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939395751.791, "dur": 23.304, + "args": { + "External id": 983833,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16733 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.2)", "pid": 2338710, "tid": 2338710, + "ts": 6345939395952.053, "dur": 160.558, + "args": { + "External id": 983834,"Record function id": 0, "Ev Idx": 16734 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345939396211.343, "dur": 59.007, + "args": { + "External id": 983835,"Record function id": 0, "Ev Idx": 16735 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.3)", "pid": 2338710, "tid": 2338710, + "ts": 6345939396280.053, "dur": 28540.535, + "args": { + "External id": 983836,"Record function id": 0, "Ev Idx": 16736 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2338710, "tid": 2338710, + "ts": 6345939396290.345, "dur": 1103.767, + "args": { + "External id": 983837,"Record function id": 0, "Ev Idx": 16737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939396386.658, "dur": 12.985, + "args": { + "External id": 983838,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939396416.847, "dur": 43.084, + "args": { + "External id": 983839,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939396423.116, "dur": 3.037, + "args": { + "External id": 983840,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939396431.281, "dur": 0.549, + "args": { + "External id": 983841,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939396433.100, "dur": 0.583, + "args": { + "External id": 983842,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939396435.001, "dur": 0.696, + "args": { + "External id": 983843,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939396439.379, "dur": 0.545, + "args": { + "External id": 983844,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939396441.202, "dur": 0.789, + "args": { + "External id": 983845,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939396443.166, "dur": 5.226, + "args": { + "External id": 983846,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939396449.377, "dur": 0.433, + "args": { + "External id": 983847,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939396450.828, "dur": 0.671, + "args": { + "External id": 983848,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939396472.458, "dur": 67.198, + "args": { + "External id": 983849,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345939396580.326, "dur": 138.853, + "args": { + "External id": 983850,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939396594.772, "dur": 5.187, + "args": { + "External id": 983851,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345939396606.208, "dur": 12.281, + "args": { + "External id": 983852,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345939396611.885, "dur": 6.085, + "args": { + "External id": 983853,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939396615.836, "dur": 0.656, + "args": { + "External id": 983854,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939396626.345, "dur": 30.698, + "args": { + "External id": 983855,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939396628.575, "dur": 3.062, + "args": { + "External id": 983856,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939396632.957, "dur": 0.581, + "args": { + "External id": 983857,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939396634.716, "dur": 0.369, + "args": { + "External id": 983858,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939396638.754, "dur": 2.754, + "args": { + "External id": 983859,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939396642.463, "dur": 0.348, + "args": { + "External id": 983860,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939396644.255, "dur": 0.342, + "args": { + "External id": 983861,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939396646.435, "dur": 0.427, + "args": { + "External id": 983862,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939396647.933, "dur": 0.300, + "args": { + "External id": 983863,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939396649.644, "dur": 2.147, + "args": { + "External id": 983864,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939396671.489, "dur": 38.795, + "args": { + "External id": 983865,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345939396785.128, "dur": 484.844, + "args": { + "External id": 983866,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939396824.368, "dur": 439.233, + "args": { + "External id": 983867,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16767, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345939396836.393, "dur": 419.500, + "args": { + "External id": 983868,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939397304.773, "dur": 3.202, + "args": { + "External id": 983869,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16769, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2338710, "tid": 2338710, + "ts": 6345939397417.069, "dur": 27163.549, + "args": { + "External id": 983870,"Record function id": 0, "Ev Idx": 16770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939397536.499, "dur": 8.070, + "args": { + "External id": 983871,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939397548.297, "dur": 1.567, + "args": { + "External id": 983872,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939397551.742, "dur": 3.838, + "args": { + "External id": 983873,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939397561.317, "dur": 1.130, + "args": { + "External id": 983874,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939397564.040, "dur": 1.099, + "args": { + "External id": 983875,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939397566.602, "dur": 1.092, + "args": { + "External id": 983876,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939397571.574, "dur": 1.205, + "args": { + "External id": 983877,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939397574.453, "dur": 2.625, + "args": { + "External id": 983878,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939397579.055, "dur": 0.911, + "args": { + "External id": 983879,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939397581.323, "dur": 1.144, + "args": { + "External id": 983880,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939397605.563, "dur": 26910.695, + "args": { + "External id": 983881,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939397623.869, "dur": 26880.325, + "args": { + "External id": 983882,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939397644.180, "dur": 19.940, + "args": { + "External id": 983883,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345939397668.245, "dur": 26788.951, + "args": { + "External id": 983884,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939397671.402, "dur": 26784.673, + "args": { + "External id": 983885,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939397679.125, "dur": 6.016, + "args": { + "External id": 983886,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939397687.140, "dur": 26763.574, + "args": { + "External id": 983887,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939424750.270, "dur": 41.510, + "args": { + "External id": 983888,"Sequence number": 10552478, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16788 + } + }, + { + "ph": "s", "id": 441, "pid": 2338710, "tid": 2338710, "ts": 6345939424750.270, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345939424772.795, "dur": 13.299, + "args": { + "External id": 983889,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939424778.837, "dur": 6.997, + "args": { + "External id": 983890,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345939424869.865, "dur": 93.706, + "args": { + "External id": 983891,"Record function id": 0, "Ev Idx": 16791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345939424964.895, "dur": 1405.793, + "args": { + "External id": 983892,"Record function id": 0, "Ev Idx": 16792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939425026.075, "dur": 1326.400, + "args": { + "External id": 983893,"Sequence number": 10552479, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16793 + } + }, + { + "ph": "s", "id": 440, "pid": 2338710, "tid": 2338710, "ts": 6345939425026.075, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939425151.188, "dur": 64.849, + "args": { + "External id": 983894,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939425234.128, "dur": 122.134, + "args": { + "External id": 983895,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939425371.752, "dur": 45.495, + "args": { + "External id": 983896,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939425428.035, "dur": 33.850, + "args": { + "External id": 983897,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939425493.045, "dur": 30.907, + "args": { + "External id": 983898,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345939425548.051, "dur": 20.528, + "args": { + "External id": 983899,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345939425596.771, "dur": 173.651, + "args": { + "External id": 983900,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345939425658.599, "dur": 16.949, + "args": { + "External id": 983901,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939425666.163, "dur": 8.202, + "args": { + "External id": 983902,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939425686.957, "dur": 6.497, + "args": { + "External id": 983903,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939425695.083, "dur": 1.766, + "args": { + "External id": 983904,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939425702.379, "dur": 6.021, + "args": { + "External id": 983905,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939425783.455, "dur": 62.458, + "args": { + "External id": 983906,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345939425886.203, "dur": 37.038, + "args": { + "External id": 983907,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939425935.022, "dur": 50.592, + "args": { + "External id": 983908,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939425992.934, "dur": 107.031, + "args": { + "External id": 983909,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939426131.147, "dur": 34.401, + "args": { + "External id": 983910,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939426173.463, "dur": 49.911, + "args": { + "External id": 983911,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939426246.821, "dur": 24.106, + "args": { + "External id": 983912,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16812 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.3)", "pid": 2338710, "tid": 2338710, + "ts": 6345939426450.624, "dur": 89.753, + "args": { + "External id": 983913,"Record function id": 0, "Ev Idx": 16813 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345939426627.849, "dur": 58.271, + "args": { + "External id": 983914,"Record function id": 0, "Ev Idx": 16814 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.4)", "pid": 2338710, "tid": 2338710, + "ts": 6345939426695.696, "dur": 30310.946, + "args": { + "External id": 983915,"Record function id": 0, "Ev Idx": 16815 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2338710, "tid": 2338710, + "ts": 6345939426705.403, "dur": 1092.890, + "args": { + "External id": 983916,"Record function id": 0, "Ev Idx": 16816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939426801.006, "dur": 11.056, + "args": { + "External id": 983917,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939426827.199, "dur": 42.351, + "args": { + "External id": 983918,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939426833.259, "dur": 2.723, + "args": { + "External id": 983919,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939426841.036, "dur": 0.720, + "args": { + "External id": 983920,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939426843.162, "dur": 0.377, + "args": { + "External id": 983921,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939426845.059, "dur": 0.691, + "args": { + "External id": 983922,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939426849.465, "dur": 0.517, + "args": { + "External id": 983923,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939426851.160, "dur": 0.396, + "args": { + "External id": 983924,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939426853.059, "dur": 5.258, + "args": { + "External id": 983925,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939426859.576, "dur": 0.654, + "args": { + "External id": 983926,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939426861.557, "dur": 0.510, + "args": { + "External id": 983927,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939426881.279, "dur": 60.543, + "args": { + "External id": 983928,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345939426979.255, "dur": 219.824, + "args": { + "External id": 983929,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939426991.163, "dur": 4.554, + "args": { + "External id": 983930,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345939427002.003, "dur": 36.191, + "args": { + "External id": 983931,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345939427028.129, "dur": 9.491, + "args": { + "External id": 983932,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939427034.358, "dur": 1.008, + "args": { + "External id": 983933,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939427047.350, "dur": 81.015, + "args": { + "External id": 983934,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939427049.922, "dur": 42.029, + "args": { + "External id": 983935,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939427101.500, "dur": 0.595, + "args": { + "External id": 983936,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939427103.404, "dur": 0.356, + "args": { + "External id": 983937,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939427106.937, "dur": 3.243, + "args": { + "External id": 983938,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939427111.459, "dur": 0.568, + "args": { + "External id": 983939,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939427113.274, "dur": 2.490, + "args": { + "External id": 983940,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939427116.894, "dur": 0.285, + "args": { + "External id": 983941,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939427118.297, "dur": 0.434, + "args": { + "External id": 983942,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939427122.115, "dur": 0.527, + "args": { + "External id": 983943,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939427144.099, "dur": 44.785, + "args": { + "External id": 983944,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345939427268.813, "dur": 419.356, + "args": { + "External id": 983945,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939427307.487, "dur": 374.889, + "args": { + "External id": 983946,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16846, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345939427319.738, "dur": 356.249, + "args": { + "External id": 983947,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939427715.629, "dur": 2.530, + "args": { + "External id": 983948,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16848, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2338710, "tid": 2338710, + "ts": 6345939427820.824, "dur": 28949.535, + "args": { + "External id": 983949,"Record function id": 0, "Ev Idx": 16849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939427933.815, "dur": 6.992, + "args": { + "External id": 983950,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939427944.698, "dur": 1.393, + "args": { + "External id": 983951,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939427948.170, "dur": 4.070, + "args": { + "External id": 983952,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939427954.483, "dur": 0.942, + "args": { + "External id": 983953,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939427957.186, "dur": 1.180, + "args": { + "External id": 983954,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939427959.822, "dur": 0.909, + "args": { + "External id": 983955,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939427965.166, "dur": 1.027, + "args": { + "External id": 983956,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939427968.025, "dur": 2.071, + "args": { + "External id": 983957,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939427972.022, "dur": 1.068, + "args": { + "External id": 983958,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939427974.738, "dur": 0.791, + "args": { + "External id": 983959,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939427997.938, "dur": 28718.661, + "args": { + "External id": 983960,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939428039.140, "dur": 28666.773, + "args": { + "External id": 983961,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939428100.356, "dur": 23.685, + "args": { + "External id": 983962,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345939428128.776, "dur": 28534.026, + "args": { + "External id": 983963,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939428132.040, "dur": 28529.403, + "args": { + "External id": 983964,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939428139.006, "dur": 6.981, + "args": { + "External id": 983965,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939428148.179, "dur": 28509.242, + "args": { + "External id": 983966,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939456939.245, "dur": 38.813, + "args": { + "External id": 983967,"Sequence number": 10552480, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16867 + } + }, + { + "ph": "s", "id": 439, "pid": 2338710, "tid": 2338710, "ts": 6345939456939.245, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345939456960.600, "dur": 11.690, + "args": { + "External id": 983968,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939456966.004, "dur": 6.047, + "args": { + "External id": 983969,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345939457101.609, "dur": 92.233, + "args": { + "External id": 983970,"Record function id": 0, "Ev Idx": 16870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345939457196.346, "dur": 1341.974, + "args": { + "External id": 983971,"Record function id": 0, "Ev Idx": 16871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939457245.174, "dur": 1274.841, + "args": { + "External id": 983972,"Sequence number": 10552481, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16872 + } + }, + { + "ph": "s", "id": 438, "pid": 2338710, "tid": 2338710, "ts": 6345939457245.174, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939457334.605, "dur": 62.868, + "args": { + "External id": 983973,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939457415.879, "dur": 122.909, + "args": { + "External id": 983974,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939457553.326, "dur": 44.103, + "args": { + "External id": 983975,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939457607.797, "dur": 35.789, + "args": { + "External id": 983976,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939457672.937, "dur": 32.495, + "args": { + "External id": 983977,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345939457729.244, "dur": 22.685, + "args": { + "External id": 983978,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345939457779.280, "dur": 160.291, + "args": { + "External id": 983979,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345939457837.717, "dur": 15.892, + "args": { + "External id": 983980,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939457845.113, "dur": 7.536, + "args": { + "External id": 983981,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939457857.742, "dur": 4.734, + "args": { + "External id": 983982,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939457863.865, "dur": 1.592, + "args": { + "External id": 983983,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939457868.204, "dur": 7.139, + "args": { + "External id": 983984,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939457952.097, "dur": 79.176, + "args": { + "External id": 983985,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345939458117.666, "dur": 38.052, + "args": { + "External id": 983986,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939458169.451, "dur": 60.126, + "args": { + "External id": 983987,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939458239.644, "dur": 41.013, + "args": { + "External id": 983988,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939458308.849, "dur": 29.591, + "args": { + "External id": 983989,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939458347.253, "dur": 40.539, + "args": { + "External id": 983990,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939458408.816, "dur": 27.790, + "args": { + "External id": 983991,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16891 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.4)", "pid": 2338710, "tid": 2338710, + "ts": 6345939458616.181, "dur": 91.638, + "args": { + "External id": 983992,"Record function id": 0, "Ev Idx": 16892 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345939458797.243, "dur": 57.841, + "args": { + "External id": 983993,"Record function id": 0, "Ev Idx": 16893 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.5)", "pid": 2338710, "tid": 2338710, + "ts": 6345939458865.528, "dur": 29845.729, + "args": { + "External id": 983994,"Record function id": 0, "Ev Idx": 16894 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2338710, "tid": 2338710, + "ts": 6345939458875.741, "dur": 1101.760, + "args": { + "External id": 983995,"Record function id": 0, "Ev Idx": 16895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939458970.808, "dur": 10.919, + "args": { + "External id": 983996,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939458997.001, "dur": 106.742, + "args": { + "External id": 983997,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939459003.342, "dur": 3.352, + "args": { + "External id": 983998,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939459033.097, "dur": 0.951, + "args": { + "External id": 983999,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939459035.867, "dur": 0.766, + "args": { + "External id": 984000,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939459038.000, "dur": 0.623, + "args": { + "External id": 984001,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939459042.152, "dur": 0.363, + "args": { + "External id": 984002,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939459043.546, "dur": 0.521, + "args": { + "External id": 984003,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939459045.142, "dur": 5.031, + "args": { + "External id": 984004,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939459051.384, "dur": 40.576, + "args": { + "External id": 984005,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939459095.980, "dur": 0.417, + "args": { + "External id": 984006,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939459118.201, "dur": 67.327, + "args": { + "External id": 984007,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345939459227.993, "dur": 159.836, + "args": { + "External id": 984008,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939459245.403, "dur": 6.161, + "args": { + "External id": 984009,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345939459257.801, "dur": 12.590, + "args": { + "External id": 984010,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345939459263.864, "dur": 6.001, + "args": { + "External id": 984011,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939459267.734, "dur": 0.589, + "args": { + "External id": 984012,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939459278.708, "dur": 43.102, + "args": { + "External id": 984013,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939459281.395, "dur": 2.634, + "args": { + "External id": 984014,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939459285.221, "dur": 0.674, + "args": { + "External id": 984015,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939459287.467, "dur": 0.684, + "args": { + "External id": 984016,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939459291.441, "dur": 3.223, + "args": { + "External id": 984017,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939459304.811, "dur": 0.684, + "args": { + "External id": 984018,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939459308.001, "dur": 0.393, + "args": { + "External id": 984019,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939459311.388, "dur": 0.313, + "args": { + "External id": 984020,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939459312.527, "dur": 0.276, + "args": { + "External id": 984021,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939459314.098, "dur": 2.414, + "args": { + "External id": 984022,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939459339.725, "dur": 38.496, + "args": { + "External id": 984023,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345939459457.848, "dur": 411.735, + "args": { + "External id": 984024,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939459494.061, "dur": 369.914, + "args": { + "External id": 984025,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16925, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345939459506.507, "dur": 351.182, + "args": { + "External id": 984026,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939459895.859, "dur": 2.958, + "args": { + "External id": 984027,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16927, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2338710, "tid": 2338710, + "ts": 6345939460001.457, "dur": 28439.487, + "args": { + "External id": 984028,"Record function id": 0, "Ev Idx": 16928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939460189.044, "dur": 8.271, + "args": { + "External id": 984029,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939460201.628, "dur": 1.005, + "args": { + "External id": 984030,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939460204.587, "dur": 3.647, + "args": { + "External id": 984031,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939460210.113, "dur": 1.315, + "args": { + "External id": 984032,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939460212.946, "dur": 0.760, + "args": { + "External id": 984033,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939460217.918, "dur": 1.209, + "args": { + "External id": 984034,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939460220.818, "dur": 1.088, + "args": { + "External id": 984035,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939460223.388, "dur": 2.704, + "args": { + "External id": 984036,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939460227.606, "dur": 0.899, + "args": { + "External id": 984037,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939460232.294, "dur": 0.856, + "args": { + "External id": 984038,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939460255.932, "dur": 28124.258, + "args": { + "External id": 984039,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939460275.634, "dur": 28094.404, + "args": { + "External id": 984040,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939460294.828, "dur": 20.303, + "args": { + "External id": 984041,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345939460319.390, "dur": 28004.501, + "args": { + "External id": 984042,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939460322.617, "dur": 27999.562, + "args": { + "External id": 984043,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939460329.372, "dur": 5.984, + "args": { + "External id": 984044,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939460337.477, "dur": 27980.758, + "args": { + "External id": 984045,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939488627.033, "dur": 44.072, + "args": { + "External id": 984046,"Sequence number": 10552482, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16946 + } + }, + { + "ph": "s", "id": 437, "pid": 2338710, "tid": 2338710, "ts": 6345939488627.033, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345939488652.264, "dur": 11.967, + "args": { + "External id": 984047,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939488657.649, "dur": 6.327, + "args": { + "External id": 984048,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345939488766.172, "dur": 89.613, + "args": { + "External id": 984049,"Record function id": 0, "Ev Idx": 16949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345939488857.439, "dur": 1416.136, + "args": { + "External id": 984050,"Record function id": 0, "Ev Idx": 16950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939488903.779, "dur": 1352.421, + "args": { + "External id": 984051,"Sequence number": 10552483, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16951 + } + }, + { + "ph": "s", "id": 436, "pid": 2338710, "tid": 2338710, "ts": 6345939488903.779, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939488988.875, "dur": 114.620, + "args": { + "External id": 984052,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939489126.927, "dur": 118.937, + "args": { + "External id": 984053,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939489260.809, "dur": 45.176, + "args": { + "External id": 984054,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939489316.311, "dur": 34.624, + "args": { + "External id": 984055,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939489383.277, "dur": 34.461, + "args": { + "External id": 984056,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345939489441.784, "dur": 21.602, + "args": { + "External id": 984057,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345939489491.067, "dur": 162.585, + "args": { + "External id": 984058,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345939489552.381, "dur": 15.649, + "args": { + "External id": 984059,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939489559.848, "dur": 7.000, + "args": { + "External id": 984060,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939489572.161, "dur": 5.047, + "args": { + "External id": 984061,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939489578.933, "dur": 1.696, + "args": { + "External id": 984062,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939489583.416, "dur": 7.865, + "args": { + "External id": 984063,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939489668.255, "dur": 60.891, + "args": { + "External id": 984064,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345939489792.294, "dur": 37.137, + "args": { + "External id": 984065,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939489841.629, "dur": 50.662, + "args": { + "External id": 984066,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939489899.482, "dur": 41.400, + "args": { + "External id": 984067,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939489968.161, "dur": 31.742, + "args": { + "External id": 984068,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939490028.785, "dur": 89.378, + "args": { + "External id": 984069,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939490145.931, "dur": 24.702, + "args": { + "External id": 984070,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16970 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.5)", "pid": 2338710, "tid": 2338710, + "ts": 6345939490350.321, "dur": 92.959, + "args": { + "External id": 984071,"Record function id": 0, "Ev Idx": 16971 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345939490530.216, "dur": 54.481, + "args": { + "External id": 984072,"Record function id": 0, "Ev Idx": 16972 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.6)", "pid": 2338710, "tid": 2338710, + "ts": 6345939490594.504, "dur": 30451.538, + "args": { + "External id": 984073,"Record function id": 0, "Ev Idx": 16973 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2338710, "tid": 2338710, + "ts": 6345939490605.239, "dur": 1133.235, + "args": { + "External id": 984074,"Record function id": 0, "Ev Idx": 16974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939490696.262, "dur": 12.059, + "args": { + "External id": 984075,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939490724.096, "dur": 38.694, + "args": { + "External id": 984076,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939490730.196, "dur": 2.563, + "args": { + "External id": 984077,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939490737.465, "dur": 0.431, + "args": { + "External id": 984078,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939490739.076, "dur": 0.535, + "args": { + "External id": 984079,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939490740.734, "dur": 0.397, + "args": { + "External id": 984080,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939490744.278, "dur": 0.532, + "args": { + "External id": 984081,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939490745.865, "dur": 0.431, + "args": { + "External id": 984082,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939490747.629, "dur": 5.191, + "args": { + "External id": 984083,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939490753.940, "dur": 0.479, + "args": { + "External id": 984084,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939490755.353, "dur": 0.619, + "args": { + "External id": 984085,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939490778.774, "dur": 64.081, + "args": { + "External id": 984086,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345939490881.860, "dur": 159.431, + "args": { + "External id": 984087,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939490895.882, "dur": 7.143, + "args": { + "External id": 984088,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345939490909.846, "dur": 11.759, + "args": { + "External id": 984089,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345939490915.280, "dur": 5.816, + "args": { + "External id": 984090,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939490919.030, "dur": 0.729, + "args": { + "External id": 984091,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939490929.576, "dur": 29.571, + "args": { + "External id": 984092,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939490931.736, "dur": 0.492, + "args": { + "External id": 984093,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939490933.692, "dur": 3.339, + "args": { + "External id": 984094,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939490938.225, "dur": 0.408, + "args": { + "External id": 984095,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939490940.028, "dur": 2.537, + "args": { + "External id": 984096,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939490945.519, "dur": 0.444, + "args": { + "External id": 984097,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939490946.878, "dur": 0.596, + "args": { + "External id": 984098,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939490948.642, "dur": 0.463, + "args": { + "External id": 984099,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939490952.085, "dur": 0.482, + "args": { + "External id": 984100,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939490953.485, "dur": 0.445, + "args": { + "External id": 984101,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939490973.707, "dur": 54.991, + "args": { + "External id": 984102,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345939491154.072, "dur": 469.049, + "args": { + "External id": 984103,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939491189.990, "dur": 426.817, + "args": { + "External id": 984104,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17004, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345939491202.119, "dur": 407.434, + "args": { + "External id": 984105,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939491650.800, "dur": 2.179, + "args": { + "External id": 984106,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17006, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2338710, "tid": 2338710, + "ts": 6345939491763.268, "dur": 29015.219, + "args": { + "External id": 984107,"Record function id": 0, "Ev Idx": 17007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939491880.042, "dur": 8.015, + "args": { + "External id": 984108,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939491891.901, "dur": 1.404, + "args": { + "External id": 984109,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939491895.362, "dur": 3.619, + "args": { + "External id": 984110,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939491901.165, "dur": 0.945, + "args": { + "External id": 984111,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939491903.546, "dur": 1.083, + "args": { + "External id": 984112,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939491907.610, "dur": 1.182, + "args": { + "External id": 984113,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939491913.341, "dur": 1.036, + "args": { + "External id": 984114,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939491916.424, "dur": 2.839, + "args": { + "External id": 984115,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939491920.968, "dur": 0.701, + "args": { + "External id": 984116,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939491923.495, "dur": 0.844, + "args": { + "External id": 984117,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939491946.916, "dur": 28771.572, + "args": { + "External id": 984118,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939491965.366, "dur": 28742.157, + "args": { + "External id": 984119,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939491982.475, "dur": 19.695, + "args": { + "External id": 984120,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345939492006.616, "dur": 28654.103, + "args": { + "External id": 984121,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939492032.489, "dur": 28627.587, + "args": { + "External id": 984122,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939492040.344, "dur": 8.568, + "args": { + "External id": 984123,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939492050.985, "dur": 28603.638, + "args": { + "External id": 984124,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939520956.758, "dur": 40.146, + "args": { + "External id": 984125,"Sequence number": 10552484, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17025 + } + }, + { + "ph": "s", "id": 435, "pid": 2338710, "tid": 2338710, "ts": 6345939520956.758, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345939520980.506, "dur": 10.536, + "args": { + "External id": 984126,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939520985.630, "dur": 5.212, + "args": { + "External id": 984127,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345939521131.693, "dur": 91.749, + "args": { + "External id": 984128,"Record function id": 0, "Ev Idx": 17028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345939521225.896, "dur": 1318.746, + "args": { + "External id": 984129,"Record function id": 0, "Ev Idx": 17029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939521273.137, "dur": 1254.594, + "args": { + "External id": 984130,"Sequence number": 10552485, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17030 + } + }, + { + "ph": "s", "id": 434, "pid": 2338710, "tid": 2338710, "ts": 6345939521273.137, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939521361.047, "dur": 61.949, + "args": { + "External id": 984131,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939521440.712, "dur": 121.428, + "args": { + "External id": 984132,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939521578.238, "dur": 44.266, + "args": { + "External id": 984133,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939521633.326, "dur": 34.583, + "args": { + "External id": 984134,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939521698.729, "dur": 31.653, + "args": { + "External id": 984135,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345939521754.973, "dur": 21.264, + "args": { + "External id": 984136,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345939521803.800, "dur": 153.879, + "args": { + "External id": 984137,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345939521861.750, "dur": 15.383, + "args": { + "External id": 984138,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939521869.060, "dur": 6.883, + "args": { + "External id": 984139,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939521881.426, "dur": 4.706, + "args": { + "External id": 984140,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939521887.632, "dur": 1.530, + "args": { + "External id": 984141,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939521892.192, "dur": 6.277, + "args": { + "External id": 984142,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939521969.722, "dur": 78.305, + "args": { + "External id": 984143,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345939522135.834, "dur": 38.572, + "args": { + "External id": 984144,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939522187.154, "dur": 54.884, + "args": { + "External id": 984145,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939522252.954, "dur": 40.168, + "args": { + "External id": 984146,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939522320.117, "dur": 29.803, + "args": { + "External id": 984147,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939522359.724, "dur": 41.409, + "args": { + "External id": 984148,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939522425.083, "dur": 20.139, + "args": { + "External id": 984149,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17049 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.6)", "pid": 2338710, "tid": 2338710, + "ts": 6345939522621.314, "dur": 90.349, + "args": { + "External id": 984150,"Record function id": 0, "Ev Idx": 17050 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345939522802.348, "dur": 58.404, + "args": { + "External id": 984151,"Record function id": 0, "Ev Idx": 17051 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.7)", "pid": 2338710, "tid": 2338710, + "ts": 6345939522870.803, "dur": 30670.748, + "args": { + "External id": 984152,"Record function id": 0, "Ev Idx": 17052 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2338710, "tid": 2338710, + "ts": 6345939522879.783, "dur": 1048.901, + "args": { + "External id": 984153,"Record function id": 0, "Ev Idx": 17053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939522972.789, "dur": 11.440, + "args": { + "External id": 984154,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939523000.103, "dur": 107.330, + "args": { + "External id": 984155,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939523006.218, "dur": 23.506, + "args": { + "External id": 984156,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939523036.409, "dur": 0.690, + "args": { + "External id": 984157,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939523038.290, "dur": 0.517, + "args": { + "External id": 984158,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939523040.219, "dur": 0.444, + "args": { + "External id": 984159,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939523043.783, "dur": 0.608, + "args": { + "External id": 984160,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939523045.926, "dur": 0.671, + "args": { + "External id": 984161,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939523047.903, "dur": 44.053, + "args": { + "External id": 984162,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939523097.229, "dur": 0.463, + "args": { + "External id": 984163,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939523099.106, "dur": 0.687, + "args": { + "External id": 984164,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939523122.489, "dur": 68.222, + "args": { + "External id": 984165,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345939523241.392, "dur": 158.127, + "args": { + "External id": 984166,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939523255.978, "dur": 8.122, + "args": { + "External id": 984167,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345939523270.891, "dur": 12.489, + "args": { + "External id": 984168,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345939523276.356, "dur": 6.484, + "args": { + "External id": 984169,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939523280.624, "dur": 0.607, + "args": { + "External id": 984170,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939523292.550, "dur": 31.830, + "args": { + "External id": 984171,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939523294.928, "dur": 0.634, + "args": { + "External id": 984172,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939523297.635, "dur": 2.403, + "args": { + "External id": 984173,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939523301.679, "dur": 0.580, + "args": { + "External id": 984174,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939523303.897, "dur": 2.608, + "args": { + "External id": 984175,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939523309.669, "dur": 0.328, + "args": { + "External id": 984176,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939523311.801, "dur": 0.432, + "args": { + "External id": 984177,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939523313.726, "dur": 0.482, + "args": { + "External id": 984178,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939523317.405, "dur": 0.453, + "args": { + "External id": 984179,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939523319.698, "dur": 0.513, + "args": { + "External id": 984180,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939523342.514, "dur": 47.089, + "args": { + "External id": 984181,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345939523468.118, "dur": 354.191, + "args": { + "External id": 984182,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939523504.578, "dur": 312.195, + "args": { + "External id": 984183,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17083, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345939523516.856, "dur": 293.334, + "args": { + "External id": 984184,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939523849.152, "dur": 2.473, + "args": { + "External id": 984185,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17085, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2338710, "tid": 2338710, + "ts": 6345939523951.695, "dur": 29344.821, + "args": { + "External id": 984186,"Record function id": 0, "Ev Idx": 17086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939524137.761, "dur": 7.933, + "args": { + "External id": 984187,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939524150.715, "dur": 1.318, + "args": { + "External id": 984188,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939524154.265, "dur": 3.727, + "args": { + "External id": 984189,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939524160.220, "dur": 1.057, + "args": { + "External id": 984190,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939524163.288, "dur": 0.952, + "args": { + "External id": 984191,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939524165.955, "dur": 0.732, + "args": { + "External id": 984192,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939524170.944, "dur": 0.893, + "args": { + "External id": 984193,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939524173.484, "dur": 2.282, + "args": { + "External id": 984194,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939524177.613, "dur": 0.736, + "args": { + "External id": 984195,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939524179.834, "dur": 0.735, + "args": { + "External id": 984196,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939524204.942, "dur": 29037.624, + "args": { + "External id": 984197,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939524224.038, "dur": 29008.348, + "args": { + "External id": 984198,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939524243.126, "dur": 20.976, + "args": { + "External id": 984199,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345939524268.541, "dur": 28919.908, + "args": { + "External id": 984200,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939524271.821, "dur": 28915.944, + "args": { + "External id": 984201,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939524278.503, "dur": 8.625, + "args": { + "External id": 984202,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939524289.415, "dur": 28893.354, + "args": { + "External id": 984203,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939553468.919, "dur": 41.891, + "args": { + "External id": 984204,"Sequence number": 10552486, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17104 + } + }, + { + "ph": "s", "id": 433, "pid": 2338710, "tid": 2338710, "ts": 6345939553468.919, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345939553492.150, "dur": 11.942, + "args": { + "External id": 984205,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939553497.786, "dur": 6.117, + "args": { + "External id": 984206,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345939553592.489, "dur": 92.121, + "args": { + "External id": 984207,"Record function id": 0, "Ev Idx": 17107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345939553686.326, "dur": 1418.208, + "args": { + "External id": 984208,"Record function id": 0, "Ev Idx": 17108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939553735.416, "dur": 1309.670, + "args": { + "External id": 984209,"Sequence number": 10552487, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17109 + } + }, + { + "ph": "s", "id": 432, "pid": 2338710, "tid": 2338710, "ts": 6345939553735.416, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939553822.921, "dur": 60.168, + "args": { + "External id": 984210,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939553901.318, "dur": 136.845, + "args": { + "External id": 984211,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939554098.209, "dur": 61.156, + "args": { + "External id": 984212,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939554171.058, "dur": 35.741, + "args": { + "External id": 984213,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939554240.330, "dur": 34.429, + "args": { + "External id": 984214,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345939554300.821, "dur": 19.805, + "args": { + "External id": 984215,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345939554348.552, "dur": 163.767, + "args": { + "External id": 984216,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345939554410.197, "dur": 15.522, + "args": { + "External id": 984217,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939554417.530, "dur": 7.209, + "args": { + "External id": 984218,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939554430.266, "dur": 4.484, + "args": { + "External id": 984219,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939554436.149, "dur": 1.166, + "args": { + "External id": 984220,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939554440.272, "dur": 8.394, + "args": { + "External id": 984221,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939554527.113, "dur": 62.713, + "args": { + "External id": 984222,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345939554629.048, "dur": 34.060, + "args": { + "External id": 984223,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939554674.416, "dur": 50.908, + "args": { + "External id": 984224,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939554733.971, "dur": 42.079, + "args": { + "External id": 984225,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939554809.740, "dur": 34.843, + "args": { + "External id": 984226,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939554854.350, "dur": 46.308, + "args": { + "External id": 984227,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939554921.316, "dur": 23.693, + "args": { + "External id": 984228,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17128 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.7)", "pid": 2338710, "tid": 2338710, + "ts": 6345939555185.688, "dur": 95.575, + "args": { + "External id": 984229,"Record function id": 0, "Ev Idx": 17129 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345939555373.806, "dur": 59.561, + "args": { + "External id": 984230,"Record function id": 0, "Ev Idx": 17130 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.8)", "pid": 2338710, "tid": 2338710, + "ts": 6345939555443.617, "dur": 31453.359, + "args": { + "External id": 984231,"Record function id": 0, "Ev Idx": 17131 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2338710, "tid": 2338710, + "ts": 6345939555453.033, "dur": 1110.893, + "args": { + "External id": 984232,"Record function id": 0, "Ev Idx": 17132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939555551.912, "dur": 12.140, + "args": { + "External id": 984233,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939555580.302, "dur": 41.894, + "args": { + "External id": 984234,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939555586.542, "dur": 2.696, + "args": { + "External id": 984235,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939555593.661, "dur": 0.639, + "args": { + "External id": 984236,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939555595.798, "dur": 0.777, + "args": { + "External id": 984237,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939555597.965, "dur": 2.122, + "args": { + "External id": 984238,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939555601.650, "dur": 0.424, + "args": { + "External id": 984239,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939555603.740, "dur": 0.616, + "args": { + "External id": 984240,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939555607.836, "dur": 3.012, + "args": { + "External id": 984241,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939555612.747, "dur": 0.489, + "args": { + "External id": 984242,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939555614.599, "dur": 0.611, + "args": { + "External id": 984243,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939555634.735, "dur": 66.985, + "args": { + "External id": 984244,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345939555741.448, "dur": 140.825, + "args": { + "External id": 984245,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939555756.272, "dur": 4.751, + "args": { + "External id": 984246,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345939555767.074, "dur": 16.844, + "args": { + "External id": 984247,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345939555775.237, "dur": 8.195, + "args": { + "External id": 984248,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939555779.290, "dur": 2.683, + "args": { + "External id": 984249,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939555791.490, "dur": 32.927, + "args": { + "External id": 984250,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939555793.650, "dur": 0.672, + "args": { + "External id": 984251,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939555796.001, "dur": 1.091, + "args": { + "External id": 984252,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939555798.606, "dur": 0.430, + "args": { + "External id": 984253,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939555802.817, "dur": 3.090, + "args": { + "External id": 984254,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939555807.875, "dur": 0.363, + "args": { + "External id": 984255,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939555810.477, "dur": 2.200, + "args": { + "External id": 984256,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939555813.931, "dur": 0.275, + "args": { + "External id": 984257,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939555815.816, "dur": 0.415, + "args": { + "External id": 984258,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939555819.674, "dur": 0.547, + "args": { + "External id": 984259,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939555836.799, "dur": 36.160, + "args": { + "External id": 984260,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345939555945.044, "dur": 496.911, + "args": { + "External id": 984261,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939555982.864, "dur": 452.205, + "args": { + "External id": 984262,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17162, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345939555994.329, "dur": 433.065, + "args": { + "External id": 984263,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939556473.576, "dur": 3.011, + "args": { + "External id": 984264,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17164, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2338710, "tid": 2338710, + "ts": 6345939556588.756, "dur": 30051.463, + "args": { + "External id": 984265,"Record function id": 0, "Ev Idx": 17165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939556701.996, "dur": 8.020, + "args": { + "External id": 984266,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939556714.198, "dur": 1.341, + "args": { + "External id": 984267,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939556717.769, "dur": 3.802, + "args": { + "External id": 984268,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939556723.993, "dur": 1.336, + "args": { + "External id": 984269,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939556727.063, "dur": 0.896, + "args": { + "External id": 984270,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939556729.396, "dur": 1.045, + "args": { + "External id": 984271,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939556734.586, "dur": 0.877, + "args": { + "External id": 984272,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939556737.581, "dur": 2.315, + "args": { + "External id": 984273,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939556741.766, "dur": 0.966, + "args": { + "External id": 984274,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939556744.562, "dur": 0.771, + "args": { + "External id": 984275,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939556768.560, "dur": 29814.311, + "args": { + "External id": 984276,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939556787.061, "dur": 29785.605, + "args": { + "External id": 984277,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939556803.931, "dur": 18.926, + "args": { + "External id": 984278,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345939556827.480, "dur": 29701.039, + "args": { + "External id": 984279,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939556830.526, "dur": 29696.214, + "args": { + "External id": 984280,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939556837.987, "dur": 6.679, + "args": { + "External id": 984281,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939556846.507, "dur": 29676.183, + "args": { + "External id": 984282,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939586823.319, "dur": 39.381, + "args": { + "External id": 984283,"Sequence number": 10552488, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17183 + } + }, + { + "ph": "s", "id": 431, "pid": 2338710, "tid": 2338710, "ts": 6345939586823.319, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345939586844.033, "dur": 12.139, + "args": { + "External id": 984284,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939586849.494, "dur": 6.402, + "args": { + "External id": 984285,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345939586945.992, "dur": 138.914, + "args": { + "External id": 984286,"Record function id": 0, "Ev Idx": 17186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345939587089.463, "dur": 1330.130, + "args": { + "External id": 984287,"Record function id": 0, "Ev Idx": 17187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939587140.094, "dur": 1263.087, + "args": { + "External id": 984288,"Sequence number": 10552489, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17188 + } + }, + { + "ph": "s", "id": 430, "pid": 2338710, "tid": 2338710, "ts": 6345939587140.094, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939587227.812, "dur": 62.356, + "args": { + "External id": 984289,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939587308.283, "dur": 123.484, + "args": { + "External id": 984290,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939587448.823, "dur": 43.673, + "args": { + "External id": 984291,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939587504.127, "dur": 35.732, + "args": { + "External id": 984292,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939587570.119, "dur": 30.952, + "args": { + "External id": 984293,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345939587624.415, "dur": 20.772, + "args": { + "External id": 984294,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345939587671.999, "dur": 158.159, + "args": { + "External id": 984295,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345939587729.648, "dur": 15.275, + "args": { + "External id": 984296,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939587736.836, "dur": 6.989, + "args": { + "External id": 984297,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939587749.174, "dur": 5.086, + "args": { + "External id": 984298,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939587755.680, "dur": 1.208, + "args": { + "External id": 984299,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939587759.934, "dur": 6.494, + "args": { + "External id": 984300,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939587844.050, "dur": 59.951, + "args": { + "External id": 984301,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345939587941.908, "dur": 36.917, + "args": { + "External id": 984302,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939587989.854, "dur": 116.218, + "args": { + "External id": 984303,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939588121.289, "dur": 47.906, + "args": { + "External id": 984304,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939588199.110, "dur": 31.972, + "args": { + "External id": 984305,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939588239.989, "dur": 42.876, + "args": { + "External id": 984306,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939588302.895, "dur": 23.291, + "args": { + "External id": 984307,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17207 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.8)", "pid": 2338710, "tid": 2338710, + "ts": 6345939588496.440, "dur": 88.197, + "args": { + "External id": 984308,"Record function id": 0, "Ev Idx": 17208 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345939588671.285, "dur": 53.679, + "args": { + "External id": 984309,"Record function id": 0, "Ev Idx": 17209 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.9)", "pid": 2338710, "tid": 2338710, + "ts": 6345939588734.457, "dur": 31780.203, + "args": { + "External id": 984310,"Record function id": 0, "Ev Idx": 17210 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2338710, "tid": 2338710, + "ts": 6345939588744.815, "dur": 1095.439, + "args": { + "External id": 984311,"Record function id": 0, "Ev Idx": 17211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939588835.372, "dur": 10.447, + "args": { + "External id": 984312,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939588862.576, "dur": 40.713, + "args": { + "External id": 984313,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939588868.708, "dur": 2.549, + "args": { + "External id": 984314,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939588876.531, "dur": 0.585, + "args": { + "External id": 984315,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939588878.477, "dur": 0.628, + "args": { + "External id": 984316,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939588880.684, "dur": 0.566, + "args": { + "External id": 984317,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939588884.194, "dur": 0.510, + "args": { + "External id": 984318,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939588886.255, "dur": 0.336, + "args": { + "External id": 984319,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939588888.060, "dur": 4.342, + "args": { + "External id": 984320,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939588894.276, "dur": 0.435, + "args": { + "External id": 984321,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939588895.909, "dur": 0.331, + "args": { + "External id": 984322,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939588914.822, "dur": 56.276, + "args": { + "External id": 984323,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345939589033.476, "dur": 196.628, + "args": { + "External id": 984324,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939589049.433, "dur": 46.121, + "args": { + "External id": 984325,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345939589105.695, "dur": 17.000, + "args": { + "External id": 984326,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345939589114.069, "dur": 8.075, + "args": { + "External id": 984327,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939589119.150, "dur": 0.723, + "args": { + "External id": 984328,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939589132.762, "dur": 31.685, + "args": { + "External id": 984329,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939589135.113, "dur": 2.239, + "args": { + "External id": 984330,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939589139.061, "dur": 0.463, + "args": { + "External id": 984331,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939589140.703, "dur": 0.654, + "args": { + "External id": 984332,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939589144.850, "dur": 2.729, + "args": { + "External id": 984333,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939589148.792, "dur": 0.553, + "args": { + "External id": 984334,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939589150.972, "dur": 0.370, + "args": { + "External id": 984335,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939589154.509, "dur": 0.271, + "args": { + "External id": 984336,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939589156.532, "dur": 0.423, + "args": { + "External id": 984337,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939589158.289, "dur": 1.468, + "args": { + "External id": 984338,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939589181.313, "dur": 39.011, + "args": { + "External id": 984339,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345939589297.695, "dur": 432.862, + "args": { + "External id": 984340,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939589334.535, "dur": 390.384, + "args": { + "External id": 984341,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17241, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345939589346.779, "dur": 367.397, + "args": { + "External id": 984342,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939589757.495, "dur": 2.246, + "args": { + "External id": 984343,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17243, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2338710, "tid": 2338710, + "ts": 6345939589863.715, "dur": 30389.754, + "args": { + "External id": 984344,"Record function id": 0, "Ev Idx": 17244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939589977.548, "dur": 6.687, + "args": { + "External id": 984345,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939589988.120, "dur": 1.260, + "args": { + "External id": 984346,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939589991.494, "dur": 3.396, + "args": { + "External id": 984347,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939589996.764, "dur": 1.044, + "args": { + "External id": 984348,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939589999.230, "dur": 1.027, + "args": { + "External id": 984349,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939590001.628, "dur": 1.404, + "args": { + "External id": 984350,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939590028.416, "dur": 1.958, + "args": { + "External id": 984351,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939590035.764, "dur": 2.478, + "args": { + "External id": 984352,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939590039.957, "dur": 0.740, + "args": { + "External id": 984353,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939590042.655, "dur": 0.713, + "args": { + "External id": 984354,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939590108.712, "dur": 30086.184, + "args": { + "External id": 984355,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939590130.410, "dur": 30053.223, + "args": { + "External id": 984356,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939590150.150, "dur": 20.500, + "args": { + "External id": 984357,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345939590175.588, "dur": 29963.219, + "args": { + "External id": 984358,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939590178.631, "dur": 29958.442, + "args": { + "External id": 984359,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939590185.067, "dur": 6.391, + "args": { + "External id": 984360,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939590193.189, "dur": 29940.484, + "args": { + "External id": 984361,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939620437.945, "dur": 41.509, + "args": { + "External id": 984362,"Sequence number": 10552490, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17262 + } + }, + { + "ph": "s", "id": 429, "pid": 2338710, "tid": 2338710, "ts": 6345939620437.945, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345939620460.493, "dur": 12.234, + "args": { + "External id": 984363,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939620465.872, "dur": 6.585, + "args": { + "External id": 984364,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345939620567.819, "dur": 85.582, + "args": { + "External id": 984365,"Record function id": 0, "Ev Idx": 17265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345939620655.695, "dur": 1432.173, + "args": { + "External id": 984366,"Record function id": 0, "Ev Idx": 17266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939620705.400, "dur": 1326.720, + "args": { + "External id": 984367,"Sequence number": 10552491, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17267 + } + }, + { + "ph": "s", "id": 428, "pid": 2338710, "tid": 2338710, "ts": 6345939620705.400, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939620793.149, "dur": 60.584, + "args": { + "External id": 984368,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939620871.666, "dur": 122.896, + "args": { + "External id": 984369,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939621034.840, "dur": 98.431, + "args": { + "External id": 984370,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939621150.886, "dur": 40.480, + "args": { + "External id": 984371,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939621225.567, "dur": 35.096, + "args": { + "External id": 984372,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345939621287.743, "dur": 19.868, + "args": { + "External id": 984373,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345939621335.529, "dur": 177.650, + "args": { + "External id": 984374,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345939621405.964, "dur": 15.286, + "args": { + "External id": 984375,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939621413.034, "dur": 7.024, + "args": { + "External id": 984376,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939621425.695, "dur": 5.473, + "args": { + "External id": 984377,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939621432.730, "dur": 1.247, + "args": { + "External id": 984378,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939621439.124, "dur": 6.862, + "args": { + "External id": 984379,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939621528.273, "dur": 63.396, + "args": { + "External id": 984380,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345939621631.658, "dur": 33.708, + "args": { + "External id": 984381,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939621677.251, "dur": 52.245, + "args": { + "External id": 984382,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939621737.425, "dur": 41.113, + "args": { + "External id": 984383,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939621805.705, "dur": 32.106, + "args": { + "External id": 984384,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939621844.772, "dur": 42.687, + "args": { + "External id": 984385,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939621910.744, "dur": 22.373, + "args": { + "External id": 984386,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17286 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.9)", "pid": 2338710, "tid": 2338710, + "ts": 6345939622174.161, "dur": 93.523, + "args": { + "External id": 984387,"Record function id": 0, "Ev Idx": 17287 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345939622357.600, "dur": 57.691, + "args": { + "External id": 984388,"Record function id": 0, "Ev Idx": 17288 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.10)", "pid": 2338710, "tid": 2338710, + "ts": 6345939622425.687, "dur": 30371.309, + "args": { + "External id": 984389,"Record function id": 0, "Ev Idx": 17289 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2338710, "tid": 2338710, + "ts": 6345939622435.221, "dur": 1120.267, + "args": { + "External id": 984390,"Record function id": 0, "Ev Idx": 17290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939622529.657, "dur": 12.065, + "args": { + "External id": 984391,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939622558.756, "dur": 45.281, + "args": { + "External id": 984392,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939622565.327, "dur": 2.889, + "args": { + "External id": 984393,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939622572.574, "dur": 0.467, + "args": { + "External id": 984394,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939622575.274, "dur": 0.587, + "args": { + "External id": 984395,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939622577.856, "dur": 0.887, + "args": { + "External id": 984396,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939622584.268, "dur": 0.553, + "args": { + "External id": 984397,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939622586.202, "dur": 0.645, + "args": { + "External id": 984398,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939622588.479, "dur": 4.943, + "args": { + "External id": 984399,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939622594.873, "dur": 0.494, + "args": { + "External id": 984400,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939622596.712, "dur": 0.375, + "args": { + "External id": 984401,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939622617.388, "dur": 66.645, + "args": { + "External id": 984402,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345939622723.828, "dur": 142.379, + "args": { + "External id": 984403,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939622738.541, "dur": 4.416, + "args": { + "External id": 984404,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345939622749.196, "dur": 15.046, + "args": { + "External id": 984405,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345939622757.361, "dur": 6.211, + "args": { + "External id": 984406,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939622761.663, "dur": 0.471, + "args": { + "External id": 984407,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939622772.143, "dur": 34.492, + "args": { + "External id": 984408,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939622774.135, "dur": 2.909, + "args": { + "External id": 984409,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939622778.636, "dur": 0.547, + "args": { + "External id": 984410,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939622780.872, "dur": 0.495, + "args": { + "External id": 984411,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939622785.778, "dur": 2.801, + "args": { + "External id": 984412,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939622789.537, "dur": 0.328, + "args": { + "External id": 984413,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939622791.401, "dur": 0.635, + "args": { + "External id": 984414,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939622795.341, "dur": 0.339, + "args": { + "External id": 984415,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939622797.555, "dur": 0.400, + "args": { + "External id": 984416,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939622799.345, "dur": 2.266, + "args": { + "External id": 984417,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939622819.569, "dur": 37.333, + "args": { + "External id": 984418,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345939622927.773, "dur": 504.930, + "args": { + "External id": 984419,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939622965.945, "dur": 460.039, + "args": { + "External id": 984420,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17320, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345939622978.009, "dur": 440.498, + "args": { + "External id": 984421,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939623465.819, "dur": 2.617, + "args": { + "External id": 984422,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17322, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2338710, "tid": 2338710, + "ts": 6345939623579.140, "dur": 28964.979, + "args": { + "External id": 984423,"Record function id": 0, "Ev Idx": 17323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939623696.759, "dur": 8.107, + "args": { + "External id": 984424,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939623708.907, "dur": 1.216, + "args": { + "External id": 984425,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939623712.291, "dur": 3.573, + "args": { + "External id": 984426,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939623717.873, "dur": 0.964, + "args": { + "External id": 984427,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939623720.549, "dur": 0.815, + "args": { + "External id": 984428,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939623722.763, "dur": 0.814, + "args": { + "External id": 984429,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939623728.179, "dur": 1.069, + "args": { + "External id": 984430,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939623731.125, "dur": 2.151, + "args": { + "External id": 984431,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939623734.952, "dur": 0.616, + "args": { + "External id": 984432,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939623737.465, "dur": 0.823, + "args": { + "External id": 984433,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939623760.038, "dur": 28727.419, + "args": { + "External id": 984434,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939623779.464, "dur": 28697.635, + "args": { + "External id": 984435,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939623797.377, "dur": 19.609, + "args": { + "External id": 984436,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345939623821.353, "dur": 28614.053, + "args": { + "External id": 984437,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939623824.342, "dur": 28609.294, + "args": { + "External id": 984438,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939623830.693, "dur": 5.958, + "args": { + "External id": 984439,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939623838.530, "dur": 28591.241, + "args": { + "External id": 984440,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939652719.380, "dur": 45.024, + "args": { + "External id": 984441,"Sequence number": 10552492, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17341 + } + }, + { + "ph": "s", "id": 427, "pid": 2338710, "tid": 2338710, "ts": 6345939652719.380, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345939652745.682, "dur": 12.009, + "args": { + "External id": 984442,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939652751.648, "dur": 5.780, + "args": { + "External id": 984443,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345939652851.210, "dur": 92.079, + "args": { + "External id": 984444,"Record function id": 0, "Ev Idx": 17344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345939652945.431, "dur": 1379.125, + "args": { + "External id": 984445,"Record function id": 0, "Ev Idx": 17345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939652991.673, "dur": 1315.871, + "args": { + "External id": 984446,"Sequence number": 10552493, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17346 + } + }, + { + "ph": "s", "id": 426, "pid": 2338710, "tid": 2338710, "ts": 6345939652991.673, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939653129.657, "dur": 63.630, + "args": { + "External id": 984447,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939653211.809, "dur": 123.792, + "args": { + "External id": 984448,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939653352.595, "dur": 44.862, + "args": { + "External id": 984449,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939653407.194, "dur": 34.248, + "args": { + "External id": 984450,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939653472.798, "dur": 33.297, + "args": { + "External id": 984451,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345939653536.720, "dur": 21.299, + "args": { + "External id": 984452,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345939653583.881, "dur": 159.748, + "args": { + "External id": 984453,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345939653645.733, "dur": 14.571, + "args": { + "External id": 984454,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939653652.736, "dur": 6.625, + "args": { + "External id": 984455,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939653664.763, "dur": 4.532, + "args": { + "External id": 984456,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939653670.776, "dur": 1.339, + "args": { + "External id": 984457,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939653675.114, "dur": 7.355, + "args": { + "External id": 984458,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939653756.780, "dur": 58.344, + "args": { + "External id": 984459,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345939653852.743, "dur": 33.611, + "args": { + "External id": 984460,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939653898.589, "dur": 50.998, + "args": { + "External id": 984461,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939653957.576, "dur": 40.574, + "args": { + "External id": 984462,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939654048.565, "dur": 74.180, + "args": { + "External id": 984463,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939654134.281, "dur": 49.499, + "args": { + "External id": 984464,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939654209.593, "dur": 22.799, + "args": { + "External id": 984465,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17365 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.10)", "pid": 2338710, "tid": 2338710, + "ts": 6345939654403.396, "dur": 94.539, + "args": { + "External id": 984466,"Record function id": 0, "Ev Idx": 17366 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345939654587.440, "dur": 54.755, + "args": { + "External id": 984467,"Record function id": 0, "Ev Idx": 17367 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.11)", "pid": 2338710, "tid": 2338710, + "ts": 6345939654652.539, "dur": 30485.748, + "args": { + "External id": 984468,"Record function id": 0, "Ev Idx": 17368 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2338710, "tid": 2338710, + "ts": 6345939654661.275, "dur": 1151.807, + "args": { + "External id": 984469,"Record function id": 0, "Ev Idx": 17369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939654754.481, "dur": 12.152, + "args": { + "External id": 984470,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939654783.036, "dur": 40.450, + "args": { + "External id": 984471,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939654788.817, "dur": 2.397, + "args": { + "External id": 984472,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939654796.774, "dur": 0.316, + "args": { + "External id": 984473,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939654798.229, "dur": 0.478, + "args": { + "External id": 984474,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939654800.346, "dur": 0.554, + "args": { + "External id": 984475,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939654803.555, "dur": 0.607, + "args": { + "External id": 984476,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939654806.232, "dur": 0.543, + "args": { + "External id": 984477,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939654808.046, "dur": 4.144, + "args": { + "External id": 984478,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939654814.541, "dur": 0.289, + "args": { + "External id": 984479,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939654816.069, "dur": 0.363, + "args": { + "External id": 984480,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939654837.112, "dur": 62.366, + "args": { + "External id": 984481,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345939654944.259, "dur": 220.464, + "args": { + "External id": 984482,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939654958.869, "dur": 4.401, + "args": { + "External id": 984483,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345939654969.070, "dur": 18.376, + "args": { + "External id": 984484,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345939654977.085, "dur": 9.815, + "args": { + "External id": 984485,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939654981.466, "dur": 0.482, + "args": { + "External id": 984486,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939654995.272, "dur": 97.075, + "args": { + "External id": 984487,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939654997.708, "dur": 2.292, + "args": { + "External id": 984488,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939655001.926, "dur": 0.555, + "args": { + "External id": 984489,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939655003.913, "dur": 0.579, + "args": { + "External id": 984490,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939655029.500, "dur": 4.107, + "args": { + "External id": 984491,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939655036.371, "dur": 0.402, + "args": { + "External id": 984492,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939655038.486, "dur": 0.506, + "args": { + "External id": 984493,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939655041.121, "dur": 0.659, + "args": { + "External id": 984494,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939655043.791, "dur": 0.344, + "args": { + "External id": 984495,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939655045.864, "dur": 1.862, + "args": { + "External id": 984496,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939655113.075, "dur": 41.892, + "args": { + "External id": 984497,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345939655233.008, "dur": 468.107, + "args": { + "External id": 984498,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939655269.805, "dur": 424.937, + "args": { + "External id": 984499,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17399, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345939655282.124, "dur": 406.046, + "args": { + "External id": 984500,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939655729.189, "dur": 2.686, + "args": { + "External id": 984501,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17401, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2338710, "tid": 2338710, + "ts": 6345939655836.099, "dur": 29000.532, + "args": { + "External id": 984502,"Record function id": 0, "Ev Idx": 17402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939655951.345, "dur": 7.236, + "args": { + "External id": 984503,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939655962.084, "dur": 1.177, + "args": { + "External id": 984504,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939655965.174, "dur": 3.396, + "args": { + "External id": 984505,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939655970.676, "dur": 0.970, + "args": { + "External id": 984506,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939655973.301, "dur": 1.060, + "args": { + "External id": 984507,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939655976.053, "dur": 1.063, + "args": { + "External id": 984508,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939655981.326, "dur": 0.941, + "args": { + "External id": 984509,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939655983.982, "dur": 2.425, + "args": { + "External id": 984510,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939655988.006, "dur": 0.706, + "args": { + "External id": 984511,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939655990.416, "dur": 0.750, + "args": { + "External id": 984512,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939656037.795, "dur": 28740.243, + "args": { + "External id": 984513,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939656096.904, "dur": 28670.043, + "args": { + "External id": 984514,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939656117.035, "dur": 21.591, + "args": { + "External id": 984515,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345939656143.110, "dur": 28578.585, + "args": { + "External id": 984516,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939656146.588, "dur": 28574.158, + "args": { + "External id": 984517,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939656152.416, "dur": 7.504, + "args": { + "External id": 984518,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939656162.081, "dur": 28553.792, + "args": { + "External id": 984519,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939685030.272, "dur": 75.440, + "args": { + "External id": 984520,"Sequence number": 10552494, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17420 + } + }, + { + "ph": "s", "id": 425, "pid": 2338710, "tid": 2338710, "ts": 6345939685030.272, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345939685086.326, "dur": 13.343, + "args": { + "External id": 984521,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939685091.947, "dur": 7.278, + "args": { + "External id": 984522,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345939685189.638, "dur": 90.866, + "args": { + "External id": 984523,"Record function id": 0, "Ev Idx": 17423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345939685282.465, "dur": 1325.411, + "args": { + "External id": 984524,"Record function id": 0, "Ev Idx": 17424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939685328.656, "dur": 1262.902, + "args": { + "External id": 984525,"Sequence number": 10552495, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17425 + } + }, + { + "ph": "s", "id": 424, "pid": 2338710, "tid": 2338710, "ts": 6345939685328.656, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939685413.493, "dur": 61.718, + "args": { + "External id": 984526,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939685491.422, "dur": 120.244, + "args": { + "External id": 984527,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939685628.367, "dur": 42.566, + "args": { + "External id": 984528,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939685680.742, "dur": 34.005, + "args": { + "External id": 984529,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939685749.625, "dur": 33.084, + "args": { + "External id": 984530,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345939685804.682, "dur": 20.770, + "args": { + "External id": 984531,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345939685853.211, "dur": 183.203, + "args": { + "External id": 984532,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345939685912.890, "dur": 15.700, + "args": { + "External id": 984533,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939685920.203, "dur": 7.203, + "args": { + "External id": 984534,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939685933.206, "dur": 4.540, + "args": { + "External id": 984535,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939685939.105, "dur": 1.322, + "args": { + "External id": 984536,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939685943.392, "dur": 8.386, + "args": { + "External id": 984537,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939686092.204, "dur": 73.530, + "args": { + "External id": 984538,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345939686209.568, "dur": 34.181, + "args": { + "External id": 984539,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939686257.490, "dur": 54.014, + "args": { + "External id": 984540,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939686319.509, "dur": 40.440, + "args": { + "External id": 984541,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939686390.941, "dur": 28.836, + "args": { + "External id": 984542,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939686427.960, "dur": 43.038, + "args": { + "External id": 984543,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939686494.310, "dur": 21.272, + "args": { + "External id": 984544,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17444 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.11)", "pid": 2338710, "tid": 2338710, + "ts": 6345939686684.360, "dur": 88.735, + "args": { + "External id": 984545,"Record function id": 0, "Ev Idx": 17445 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345939686862.252, "dur": 55.968, + "args": { + "External id": 984546,"Record function id": 0, "Ev Idx": 17446 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.12)", "pid": 2338710, "tid": 2338710, + "ts": 6345939686928.998, "dur": 31747.343, + "args": { + "External id": 984547,"Record function id": 0, "Ev Idx": 17447 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2338710, "tid": 2338710, + "ts": 6345939686938.097, "dur": 1199.025, + "args": { + "External id": 984548,"Record function id": 0, "Ev Idx": 17448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939687094.940, "dur": 13.099, + "args": { + "External id": 984549,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939687128.180, "dur": 39.908, + "args": { + "External id": 984550,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939687134.426, "dur": 2.889, + "args": { + "External id": 984551,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939687141.373, "dur": 0.522, + "args": { + "External id": 984552,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939687143.527, "dur": 0.716, + "args": { + "External id": 984553,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939687145.728, "dur": 0.764, + "args": { + "External id": 984554,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939687149.231, "dur": 0.554, + "args": { + "External id": 984555,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939687151.044, "dur": 0.680, + "args": { + "External id": 984556,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939687153.584, "dur": 3.260, + "args": { + "External id": 984557,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939687158.560, "dur": 0.583, + "args": { + "External id": 984558,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939687160.421, "dur": 0.404, + "args": { + "External id": 984559,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939687184.478, "dur": 68.470, + "args": { + "External id": 984560,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345939687294.329, "dur": 143.575, + "args": { + "External id": 984561,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939687309.291, "dur": 5.599, + "args": { + "External id": 984562,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345939687321.378, "dur": 15.807, + "args": { + "External id": 984563,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345939687329.871, "dur": 6.783, + "args": { + "External id": 984564,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939687334.299, "dur": 0.697, + "args": { + "External id": 984565,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939687345.545, "dur": 32.388, + "args": { + "External id": 984566,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939687348.192, "dur": 0.506, + "args": { + "External id": 984567,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939687350.480, "dur": 2.022, + "args": { + "External id": 984568,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939687353.913, "dur": 0.565, + "args": { + "External id": 984569,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939687356.032, "dur": 2.885, + "args": { + "External id": 984570,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939687361.840, "dur": 0.585, + "args": { + "External id": 984571,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939687363.992, "dur": 0.590, + "args": { + "External id": 984572,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939687366.215, "dur": 0.569, + "args": { + "External id": 984573,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939687370.674, "dur": 0.360, + "args": { + "External id": 984574,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939687372.603, "dur": 0.402, + "args": { + "External id": 984575,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939687391.731, "dur": 36.752, + "args": { + "External id": 984576,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345939687530.648, "dur": 419.109, + "args": { + "External id": 984577,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939687567.183, "dur": 377.104, + "args": { + "External id": 984578,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17478, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345939687580.146, "dur": 358.091, + "args": { + "External id": 984579,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939687977.235, "dur": 2.825, + "args": { + "External id": 984580,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17480, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2338710, "tid": 2338710, + "ts": 6345939688165.936, "dur": 30237.432, + "args": { + "External id": 984581,"Record function id": 0, "Ev Idx": 17481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939688287.236, "dur": 8.118, + "args": { + "External id": 984582,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939688299.023, "dur": 1.514, + "args": { + "External id": 984583,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939688302.653, "dur": 3.588, + "args": { + "External id": 984584,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939688308.036, "dur": 1.082, + "args": { + "External id": 984585,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939688310.932, "dur": 0.923, + "args": { + "External id": 984586,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939688315.680, "dur": 0.901, + "args": { + "External id": 984587,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939688318.241, "dur": 1.142, + "args": { + "External id": 984588,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939688321.281, "dur": 2.487, + "args": { + "External id": 984589,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939688325.375, "dur": 0.933, + "args": { + "External id": 984590,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939688330.081, "dur": 0.629, + "args": { + "External id": 984591,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939688352.307, "dur": 29994.203, + "args": { + "External id": 984592,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939688371.279, "dur": 29964.662, + "args": { + "External id": 984593,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939688389.451, "dur": 19.324, + "args": { + "External id": 984594,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345939688413.150, "dur": 29879.276, + "args": { + "External id": 984595,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939688416.290, "dur": 29875.022, + "args": { + "External id": 984596,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939688422.295, "dur": 6.660, + "args": { + "External id": 984597,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939688431.095, "dur": 29855.435, + "args": { + "External id": 984598,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939718596.647, "dur": 40.612, + "args": { + "External id": 984599,"Sequence number": 10552496, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17499 + } + }, + { + "ph": "s", "id": 423, "pid": 2338710, "tid": 2338710, "ts": 6345939718596.647, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345939718618.013, "dur": 12.243, + "args": { + "External id": 984600,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939718623.670, "dur": 6.377, + "args": { + "External id": 984601,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345939718731.620, "dur": 89.727, + "args": { + "External id": 984602,"Record function id": 0, "Ev Idx": 17502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345939718823.116, "dur": 1415.356, + "args": { + "External id": 984603,"Record function id": 0, "Ev Idx": 17503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939718869.711, "dur": 1351.646, + "args": { + "External id": 984604,"Sequence number": 10552497, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17504 + } + }, + { + "ph": "s", "id": 422, "pid": 2338710, "tid": 2338710, "ts": 6345939718869.711, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939718962.716, "dur": 80.447, + "args": { + "External id": 984605,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939719102.212, "dur": 124.919, + "args": { + "External id": 984606,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939719246.006, "dur": 45.998, + "args": { + "External id": 984607,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939719301.981, "dur": 35.881, + "args": { + "External id": 984608,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939719372.742, "dur": 31.863, + "args": { + "External id": 984609,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345939719427.661, "dur": 23.479, + "args": { + "External id": 984610,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345939719478.087, "dur": 160.256, + "args": { + "External id": 984611,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345939719538.803, "dur": 15.310, + "args": { + "External id": 984612,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939719546.509, "dur": 6.705, + "args": { + "External id": 984613,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939719558.334, "dur": 4.727, + "args": { + "External id": 984614,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939719564.376, "dur": 1.150, + "args": { + "External id": 984615,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939719568.710, "dur": 6.357, + "args": { + "External id": 984616,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939719650.476, "dur": 59.456, + "args": { + "External id": 984617,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345939719747.192, "dur": 34.399, + "args": { + "External id": 984618,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939719792.867, "dur": 54.494, + "args": { + "External id": 984619,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939719856.255, "dur": 43.937, + "args": { + "External id": 984620,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939719928.391, "dur": 36.210, + "args": { + "External id": 984621,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939719972.329, "dur": 69.779, + "args": { + "External id": 984622,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939720106.852, "dur": 26.479, + "args": { + "External id": 984623,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17523 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.12)", "pid": 2338710, "tid": 2338710, + "ts": 6345939720319.107, "dur": 89.660, + "args": { + "External id": 984624,"Record function id": 0, "Ev Idx": 17524 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345939720496.533, "dur": 51.819, + "args": { + "External id": 984625,"Record function id": 0, "Ev Idx": 17525 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.13)", "pid": 2338710, "tid": 2338710, + "ts": 6345939720558.229, "dur": 30874.058, + "args": { + "External id": 984626,"Record function id": 0, "Ev Idx": 17526 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2338710, "tid": 2338710, + "ts": 6345939720567.452, "dur": 1088.066, + "args": { + "External id": 984627,"Record function id": 0, "Ev Idx": 17527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939720660.484, "dur": 12.156, + "args": { + "External id": 984628,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939720689.292, "dur": 40.208, + "args": { + "External id": 984629,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939720695.671, "dur": 2.701, + "args": { + "External id": 984630,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939720702.848, "dur": 0.500, + "args": { + "External id": 984631,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939720705.077, "dur": 0.350, + "args": { + "External id": 984632,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939720707.190, "dur": 0.517, + "args": { + "External id": 984633,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939720710.568, "dur": 0.452, + "args": { + "External id": 984634,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939720712.520, "dur": 0.699, + "args": { + "External id": 984635,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939720714.834, "dur": 3.750, + "args": { + "External id": 984636,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939720720.414, "dur": 0.402, + "args": { + "External id": 984637,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939720722.295, "dur": 0.417, + "args": { + "External id": 984638,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939720743.271, "dur": 61.013, + "args": { + "External id": 984639,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345939720848.039, "dur": 143.597, + "args": { + "External id": 984640,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939720863.001, "dur": 4.432, + "args": { + "External id": 984641,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345939720873.600, "dur": 19.805, + "args": { + "External id": 984642,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345939720879.160, "dur": 13.687, + "args": { + "External id": 984643,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939720890.749, "dur": 0.537, + "args": { + "External id": 984644,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939720901.292, "dur": 31.321, + "args": { + "External id": 984645,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939720903.441, "dur": 2.367, + "args": { + "External id": 984646,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939720907.381, "dur": 0.283, + "args": { + "External id": 984647,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939720908.841, "dur": 0.510, + "args": { + "External id": 984648,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939720913.112, "dur": 2.455, + "args": { + "External id": 984649,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939720917.104, "dur": 0.572, + "args": { + "External id": 984650,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939720919.447, "dur": 0.606, + "args": { + "External id": 984651,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939720922.754, "dur": 0.600, + "args": { + "External id": 984652,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939720924.933, "dur": 0.674, + "args": { + "External id": 984653,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939720926.527, "dur": 1.714, + "args": { + "External id": 984654,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939720947.023, "dur": 35.748, + "args": { + "External id": 984655,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345939721120.188, "dur": 421.600, + "args": { + "External id": 984656,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939721157.778, "dur": 377.665, + "args": { + "External id": 984657,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17557, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345939721170.703, "dur": 357.937, + "args": { + "External id": 984658,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939721567.932, "dur": 3.104, + "args": { + "External id": 984659,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17559, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2338710, "tid": 2338710, + "ts": 6345939721680.540, "dur": 29509.580, + "args": { + "External id": 984660,"Record function id": 0, "Ev Idx": 17560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939721798.826, "dur": 7.804, + "args": { + "External id": 984661,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939721811.282, "dur": 1.378, + "args": { + "External id": 984662,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939721814.738, "dur": 3.856, + "args": { + "External id": 984663,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939721820.626, "dur": 0.812, + "args": { + "External id": 984664,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939721823.410, "dur": 1.092, + "args": { + "External id": 984665,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939721828.390, "dur": 0.894, + "args": { + "External id": 984666,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939721831.184, "dur": 1.069, + "args": { + "External id": 984667,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939721833.899, "dur": 2.749, + "args": { + "External id": 984668,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939721838.499, "dur": 0.976, + "args": { + "External id": 984669,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939721843.583, "dur": 0.884, + "args": { + "External id": 984670,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939721866.917, "dur": 29267.181, + "args": { + "External id": 984671,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939721886.298, "dur": 29236.766, + "args": { + "External id": 984672,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939721904.543, "dur": 19.006, + "args": { + "External id": 984673,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345939721927.772, "dur": 29120.402, + "args": { + "External id": 984674,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939721930.753, "dur": 29115.847, + "args": { + "External id": 984675,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939721936.453, "dur": 6.193, + "args": { + "External id": 984676,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939721944.748, "dur": 29097.843, + "args": { + "External id": 984677,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939751358.456, "dur": 42.892, + "args": { + "External id": 984678,"Sequence number": 10552498, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17578 + } + }, + { + "ph": "s", "id": 421, "pid": 2338710, "tid": 2338710, "ts": 6345939751358.456, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345939751383.804, "dur": 12.110, + "args": { + "External id": 984679,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939751389.454, "dur": 6.240, + "args": { + "External id": 984680,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345939751480.893, "dur": 84.955, + "args": { + "External id": 984681,"Record function id": 0, "Ev Idx": 17581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345939751567.281, "dur": 1327.008, + "args": { + "External id": 984682,"Record function id": 0, "Ev Idx": 17582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939751613.301, "dur": 1264.900, + "args": { + "External id": 984683,"Sequence number": 10552499, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17583 + } + }, + { + "ph": "s", "id": 420, "pid": 2338710, "tid": 2338710, "ts": 6345939751613.301, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939751699.905, "dur": 60.914, + "args": { + "External id": 984684,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939751775.275, "dur": 125.081, + "args": { + "External id": 984685,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939751915.756, "dur": 45.276, + "args": { + "External id": 984686,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939751970.637, "dur": 36.085, + "args": { + "External id": 984687,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939752106.929, "dur": 37.800, + "args": { + "External id": 984688,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345939752171.844, "dur": 23.913, + "args": { + "External id": 984689,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345939752221.475, "dur": 161.709, + "args": { + "External id": 984690,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345939752283.943, "dur": 14.407, + "args": { + "External id": 984691,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939752291.356, "dur": 6.055, + "args": { + "External id": 984692,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939752301.534, "dur": 4.969, + "args": { + "External id": 984693,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939752307.770, "dur": 3.064, + "args": { + "External id": 984694,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939752313.800, "dur": 7.138, + "args": { + "External id": 984695,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939752396.226, "dur": 69.025, + "args": { + "External id": 984696,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345939752499.838, "dur": 35.494, + "args": { + "External id": 984697,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939752547.889, "dur": 49.577, + "args": { + "External id": 984698,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939752604.958, "dur": 40.616, + "args": { + "External id": 984699,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939752674.338, "dur": 29.273, + "args": { + "External id": 984700,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939752709.966, "dur": 45.358, + "args": { + "External id": 984701,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939752776.298, "dur": 25.420, + "args": { + "External id": 984702,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17602 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.13)", "pid": 2338710, "tid": 2338710, + "ts": 6345939752969.981, "dur": 165.126, + "args": { + "External id": 984703,"Record function id": 0, "Ev Idx": 17603 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345939753231.864, "dur": 55.076, + "args": { + "External id": 984704,"Record function id": 0, "Ev Idx": 17604 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.14)", "pid": 2338710, "tid": 2338710, + "ts": 6345939753297.866, "dur": 31292.427, + "args": { + "External id": 984705,"Record function id": 0, "Ev Idx": 17605 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2338710, "tid": 2338710, + "ts": 6345939753306.413, "dur": 1104.274, + "args": { + "External id": 984706,"Record function id": 0, "Ev Idx": 17606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939753403.167, "dur": 12.582, + "args": { + "External id": 984707,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939753432.692, "dur": 41.688, + "args": { + "External id": 984708,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939753439.212, "dur": 2.894, + "args": { + "External id": 984709,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939753446.321, "dur": 0.655, + "args": { + "External id": 984710,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939753448.311, "dur": 0.449, + "args": { + "External id": 984711,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939753450.429, "dur": 0.741, + "args": { + "External id": 984712,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939753453.940, "dur": 0.341, + "args": { + "External id": 984713,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939753456.130, "dur": 0.606, + "args": { + "External id": 984714,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939753458.249, "dur": 4.086, + "args": { + "External id": 984715,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939753464.308, "dur": 0.681, + "args": { + "External id": 984716,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939753466.373, "dur": 0.785, + "args": { + "External id": 984717,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939753487.258, "dur": 67.724, + "args": { + "External id": 984718,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345939753593.947, "dur": 143.263, + "args": { + "External id": 984719,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939753609.667, "dur": 5.545, + "args": { + "External id": 984720,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345939753623.799, "dur": 12.948, + "args": { + "External id": 984721,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345939753629.601, "dur": 6.611, + "args": { + "External id": 984722,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939753634.208, "dur": 0.464, + "args": { + "External id": 984723,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939753644.294, "dur": 33.183, + "args": { + "External id": 984724,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939753646.709, "dur": 0.839, + "args": { + "External id": 984725,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939753649.628, "dur": 2.341, + "args": { + "External id": 984726,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939753653.454, "dur": 0.256, + "args": { + "External id": 984727,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939753655.425, "dur": 3.000, + "args": { + "External id": 984728,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939753661.562, "dur": 0.386, + "args": { + "External id": 984729,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939753663.435, "dur": 0.409, + "args": { + "External id": 984730,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939753665.276, "dur": 0.739, + "args": { + "External id": 984731,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939753669.245, "dur": 0.689, + "args": { + "External id": 984732,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939753671.893, "dur": 0.588, + "args": { + "External id": 984733,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939753689.978, "dur": 38.008, + "args": { + "External id": 984734,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345939753798.715, "dur": 487.666, + "args": { + "External id": 984735,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939753833.645, "dur": 446.419, + "args": { + "External id": 984736,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17636, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345939753845.700, "dur": 426.458, + "args": { + "External id": 984737,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939754319.229, "dur": 3.414, + "args": { + "External id": 984738,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17638, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2338710, "tid": 2338710, + "ts": 6345939754433.675, "dur": 29919.716, + "args": { + "External id": 984739,"Record function id": 0, "Ev Idx": 17639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939754547.898, "dur": 7.695, + "args": { + "External id": 984740,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939754559.679, "dur": 1.406, + "args": { + "External id": 984741,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939754563.210, "dur": 3.683, + "args": { + "External id": 984742,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939754568.645, "dur": 0.765, + "args": { + "External id": 984743,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939754571.237, "dur": 0.853, + "args": { + "External id": 984744,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939754573.880, "dur": 1.250, + "args": { + "External id": 984745,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939754579.269, "dur": 0.960, + "args": { + "External id": 984746,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939754582.004, "dur": 2.543, + "args": { + "External id": 984747,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939754586.159, "dur": 0.942, + "args": { + "External id": 984748,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939754588.794, "dur": 0.819, + "args": { + "External id": 984749,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939754613.766, "dur": 29687.273, + "args": { + "External id": 984750,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939754632.550, "dur": 29657.812, + "args": { + "External id": 984751,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939754649.645, "dur": 20.230, + "args": { + "External id": 984752,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345939754674.264, "dur": 29570.353, + "args": { + "External id": 984753,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939754677.538, "dur": 29565.927, + "args": { + "External id": 984754,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939754683.764, "dur": 7.301, + "args": { + "External id": 984755,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939754693.074, "dur": 29545.771, + "args": { + "External id": 984756,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939784517.385, "dur": 42.985, + "args": { + "External id": 984757,"Sequence number": 10552500, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17657 + } + }, + { + "ph": "s", "id": 419, "pid": 2338710, "tid": 2338710, "ts": 6345939784517.385, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345939784542.800, "dur": 12.080, + "args": { + "External id": 984758,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939784548.493, "dur": 6.138, + "args": { + "External id": 984759,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345939784637.309, "dur": 90.891, + "args": { + "External id": 984760,"Record function id": 0, "Ev Idx": 17660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345939784729.696, "dur": 1368.742, + "args": { + "External id": 984761,"Record function id": 0, "Ev Idx": 17661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939784775.435, "dur": 1263.465, + "args": { + "External id": 984762,"Sequence number": 10552501, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17662 + } + }, + { + "ph": "s", "id": 418, "pid": 2338710, "tid": 2338710, "ts": 6345939784775.435, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939784855.776, "dur": 56.041, + "args": { + "External id": 984763,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939784925.671, "dur": 176.475, + "args": { + "External id": 984764,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939785123.204, "dur": 54.709, + "args": { + "External id": 984765,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939785190.947, "dur": 37.023, + "args": { + "External id": 984766,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939785261.158, "dur": 34.390, + "args": { + "External id": 984767,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345939785318.672, "dur": 22.748, + "args": { + "External id": 984768,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345939785369.109, "dur": 156.019, + "args": { + "External id": 984769,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345939785429.285, "dur": 13.673, + "args": { + "External id": 984770,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939785435.890, "dur": 6.061, + "args": { + "External id": 984771,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939785446.111, "dur": 4.542, + "args": { + "External id": 984772,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939785452.161, "dur": 1.524, + "args": { + "External id": 984773,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939785456.884, "dur": 6.026, + "args": { + "External id": 984774,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939785537.813, "dur": 63.494, + "args": { + "External id": 984775,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345939785637.134, "dur": 40.768, + "args": { + "External id": 984776,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939785690.500, "dur": 52.219, + "args": { + "External id": 984777,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939785751.536, "dur": 41.323, + "args": { + "External id": 984778,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939785819.645, "dur": 30.958, + "args": { + "External id": 984779,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939785858.146, "dur": 42.631, + "args": { + "External id": 984780,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939785921.850, "dur": 20.357, + "args": { + "External id": 984781,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17681 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.14)", "pid": 2338710, "tid": 2338710, + "ts": 6345939786182.026, "dur": 93.133, + "args": { + "External id": 984782,"Record function id": 0, "Ev Idx": 17682 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345939786366.418, "dur": 55.460, + "args": { + "External id": 984783,"Record function id": 0, "Ev Idx": 17683 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.15)", "pid": 2338710, "tid": 2338710, + "ts": 6345939786431.952, "dur": 32014.200, + "args": { + "External id": 984784,"Record function id": 0, "Ev Idx": 17684 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2338710, "tid": 2338710, + "ts": 6345939786440.708, "dur": 1084.870, + "args": { + "External id": 984785,"Record function id": 0, "Ev Idx": 17685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939786539.252, "dur": 12.847, + "args": { + "External id": 984786,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939786567.828, "dur": 41.601, + "args": { + "External id": 984787,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939786574.465, "dur": 2.731, + "args": { + "External id": 984788,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939786582.023, "dur": 0.591, + "args": { + "External id": 984789,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939786583.968, "dur": 0.665, + "args": { + "External id": 984790,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939786586.673, "dur": 0.505, + "args": { + "External id": 984791,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939786589.444, "dur": 0.554, + "args": { + "External id": 984792,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939786591.392, "dur": 0.529, + "args": { + "External id": 984793,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939786593.095, "dur": 4.332, + "args": { + "External id": 984794,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939786599.340, "dur": 0.546, + "args": { + "External id": 984795,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939786601.152, "dur": 0.561, + "args": { + "External id": 984796,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939786622.508, "dur": 65.670, + "args": { + "External id": 984797,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345939786730.566, "dur": 132.573, + "args": { + "External id": 984798,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939786743.648, "dur": 4.094, + "args": { + "External id": 984799,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345939786753.993, "dur": 12.315, + "args": { + "External id": 984800,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345939786759.357, "dur": 6.439, + "args": { + "External id": 984801,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939786763.755, "dur": 0.611, + "args": { + "External id": 984802,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939786773.826, "dur": 29.654, + "args": { + "External id": 984803,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939786776.469, "dur": 1.855, + "args": { + "External id": 984804,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939786779.873, "dur": 0.753, + "args": { + "External id": 984805,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939786782.222, "dur": 0.613, + "args": { + "External id": 984806,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939786786.170, "dur": 2.659, + "args": { + "External id": 984807,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939786790.118, "dur": 0.517, + "args": { + "External id": 984808,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939786792.219, "dur": 0.358, + "args": { + "External id": 984809,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939786794.280, "dur": 0.270, + "args": { + "External id": 984810,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939786795.861, "dur": 0.410, + "args": { + "External id": 984811,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939786797.302, "dur": 1.896, + "args": { + "External id": 984812,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939786817.829, "dur": 36.429, + "args": { + "External id": 984813,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345939786925.535, "dur": 476.962, + "args": { + "External id": 984814,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939786960.541, "dur": 435.505, + "args": { + "External id": 984815,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17715, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345939786973.002, "dur": 415.694, + "args": { + "External id": 984816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939787434.534, "dur": 3.196, + "args": { + "External id": 984817,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17717, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2338710, "tid": 2338710, + "ts": 6345939787549.531, "dur": 30638.246, + "args": { + "External id": 984818,"Record function id": 0, "Ev Idx": 17718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939787664.151, "dur": 7.881, + "args": { + "External id": 984819,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939787675.752, "dur": 1.276, + "args": { + "External id": 984820,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939787679.015, "dur": 3.539, + "args": { + "External id": 984821,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939787684.393, "dur": 1.024, + "args": { + "External id": 984822,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939787686.871, "dur": 0.749, + "args": { + "External id": 984823,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939787689.138, "dur": 0.811, + "args": { + "External id": 984824,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939787694.959, "dur": 1.086, + "args": { + "External id": 984825,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939787697.979, "dur": 2.243, + "args": { + "External id": 984826,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939787701.816, "dur": 0.776, + "args": { + "External id": 984827,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939787704.281, "dur": 0.746, + "args": { + "External id": 984828,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939787729.116, "dur": 30401.128, + "args": { + "External id": 984829,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939787749.473, "dur": 30369.662, + "args": { + "External id": 984830,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939787767.893, "dur": 20.225, + "args": { + "External id": 984831,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345939787792.572, "dur": 30249.989, + "args": { + "External id": 984832,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939787795.578, "dur": 30245.267, + "args": { + "External id": 984833,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939787801.731, "dur": 6.088, + "args": { + "External id": 984834,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939787809.825, "dur": 30226.936, + "args": { + "External id": 984835,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939818363.702, "dur": 47.478, + "args": { + "External id": 984836,"Sequence number": 10552502, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17736 + } + }, + { + "ph": "s", "id": 417, "pid": 2338710, "tid": 2338710, "ts": 6345939818363.702, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345939818391.627, "dur": 12.579, + "args": { + "External id": 984837,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939818397.458, "dur": 6.515, + "args": { + "External id": 984838,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345939818498.103, "dur": 86.764, + "args": { + "External id": 984839,"Record function id": 0, "Ev Idx": 17739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345939818587.352, "dur": 1309.722, + "args": { + "External id": 984840,"Record function id": 0, "Ev Idx": 17740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939818631.879, "dur": 1248.917, + "args": { + "External id": 984841,"Sequence number": 10552503, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17741 + } + }, + { + "ph": "s", "id": 416, "pid": 2338710, "tid": 2338710, "ts": 6345939818631.879, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939818714.131, "dur": 60.203, + "args": { + "External id": 984842,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939818789.755, "dur": 121.910, + "args": { + "External id": 984843,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939818929.243, "dur": 42.037, + "args": { + "External id": 984844,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939818980.509, "dur": 57.474, + "args": { + "External id": 984845,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939819120.259, "dur": 35.962, + "args": { + "External id": 984846,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345939819181.207, "dur": 20.434, + "args": { + "External id": 984847,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345939819230.978, "dur": 161.646, + "args": { + "External id": 984848,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345939819290.990, "dur": 16.304, + "args": { + "External id": 984849,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939819298.674, "dur": 7.543, + "args": { + "External id": 984850,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939819311.784, "dur": 4.976, + "args": { + "External id": 984851,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939819318.517, "dur": 1.544, + "args": { + "External id": 984852,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939819324.848, "dur": 6.179, + "args": { + "External id": 984853,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939819406.813, "dur": 68.666, + "args": { + "External id": 984854,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345939819513.329, "dur": 35.360, + "args": { + "External id": 984855,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939819559.770, "dur": 51.205, + "args": { + "External id": 984856,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939819620.430, "dur": 40.484, + "args": { + "External id": 984857,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939819687.233, "dur": 31.865, + "args": { + "External id": 984858,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939819725.740, "dur": 40.658, + "args": { + "External id": 984859,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939819786.726, "dur": 20.521, + "args": { + "External id": 984860,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17760 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.15)", "pid": 2338710, "tid": 2338710, + "ts": 6345939819971.327, "dur": 155.430, + "args": { + "External id": 984861,"Record function id": 0, "Ev Idx": 17761 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345939820221.426, "dur": 56.632, + "args": { + "External id": 984862,"Record function id": 0, "Ev Idx": 17762 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.16)", "pid": 2338710, "tid": 2338710, + "ts": 6345939820289.035, "dur": 30923.226, + "args": { + "External id": 984863,"Record function id": 0, "Ev Idx": 17763 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2338710, "tid": 2338710, + "ts": 6345939820298.488, "dur": 1109.623, + "args": { + "External id": 984864,"Record function id": 0, "Ev Idx": 17764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939820394.241, "dur": 12.264, + "args": { + "External id": 984865,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939820424.320, "dur": 39.805, + "args": { + "External id": 984866,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939820430.523, "dur": 2.844, + "args": { + "External id": 984867,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939820437.910, "dur": 0.693, + "args": { + "External id": 984868,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939820440.125, "dur": 0.825, + "args": { + "External id": 984869,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939820442.542, "dur": 0.649, + "args": { + "External id": 984870,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939820446.046, "dur": 0.377, + "args": { + "External id": 984871,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939820447.654, "dur": 0.368, + "args": { + "External id": 984872,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939820449.499, "dur": 3.826, + "args": { + "External id": 984873,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939820455.025, "dur": 0.585, + "args": { + "External id": 984874,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939820456.901, "dur": 0.322, + "args": { + "External id": 984875,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939820476.552, "dur": 67.960, + "args": { + "External id": 984876,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345939820585.891, "dur": 146.875, + "args": { + "External id": 984877,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939820601.072, "dur": 4.869, + "args": { + "External id": 984878,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345939820614.699, "dur": 12.546, + "args": { + "External id": 984879,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345939820620.146, "dur": 6.579, + "args": { + "External id": 984880,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939820624.501, "dur": 0.605, + "args": { + "External id": 984881,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939820635.314, "dur": 37.604, + "args": { + "External id": 984882,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939820637.981, "dur": 1.552, + "args": { + "External id": 984883,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939820646.992, "dur": 0.796, + "args": { + "External id": 984884,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939820650.589, "dur": 0.483, + "args": { + "External id": 984885,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939820654.672, "dur": 2.934, + "args": { + "External id": 984886,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939820658.643, "dur": 0.360, + "args": { + "External id": 984887,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939820660.679, "dur": 1.493, + "args": { + "External id": 984888,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939820663.330, "dur": 0.322, + "args": { + "External id": 984889,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939820665.196, "dur": 0.370, + "args": { + "External id": 984890,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939820667.912, "dur": 0.340, + "args": { + "External id": 984891,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939820685.973, "dur": 37.473, + "args": { + "External id": 984892,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345939820796.104, "dur": 487.254, + "args": { + "External id": 984893,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939820832.716, "dur": 443.533, + "args": { + "External id": 984894,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17794, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345939820844.613, "dur": 424.245, + "args": { + "External id": 984895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939821315.910, "dur": 3.218, + "args": { + "External id": 984896,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17796, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2338710, "tid": 2338710, + "ts": 6345939821433.297, "dur": 29471.922, + "args": { + "External id": 984897,"Record function id": 0, "Ev Idx": 17797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939821549.912, "dur": 7.896, + "args": { + "External id": 984898,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939821562.048, "dur": 1.032, + "args": { + "External id": 984899,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939821565.192, "dur": 3.560, + "args": { + "External id": 984900,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939821570.446, "dur": 1.062, + "args": { + "External id": 984901,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939821573.137, "dur": 1.317, + "args": { + "External id": 984902,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939821575.768, "dur": 0.852, + "args": { + "External id": 984903,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939821580.566, "dur": 1.048, + "args": { + "External id": 984904,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939821583.752, "dur": 2.580, + "args": { + "External id": 984905,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939821588.010, "dur": 0.898, + "args": { + "External id": 984906,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939821590.542, "dur": 0.970, + "args": { + "External id": 984907,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939821615.244, "dur": 29234.365, + "args": { + "External id": 984908,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939821635.176, "dur": 29203.680, + "args": { + "External id": 984909,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939821652.522, "dur": 20.951, + "args": { + "External id": 984910,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345939821678.274, "dur": 29118.418, + "args": { + "External id": 984911,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939821681.942, "dur": 29112.939, + "args": { + "External id": 984912,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939821689.135, "dur": 6.294, + "args": { + "External id": 984913,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939821697.322, "dur": 29093.868, + "args": { + "External id": 984914,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939851135.536, "dur": 42.526, + "args": { + "External id": 984915,"Sequence number": 10552504, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17815 + } + }, + { + "ph": "s", "id": 415, "pid": 2338710, "tid": 2338710, "ts": 6345939851135.536, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345939851158.465, "dur": 12.933, + "args": { + "External id": 984916,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939851164.169, "dur": 6.790, + "args": { + "External id": 984917,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345939851267.519, "dur": 86.773, + "args": { + "External id": 984918,"Record function id": 0, "Ev Idx": 17818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345939851355.773, "dur": 1354.064, + "args": { + "External id": 984919,"Record function id": 0, "Ev Idx": 17819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939851405.125, "dur": 1288.545, + "args": { + "External id": 984920,"Sequence number": 10552505, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17820 + } + }, + { + "ph": "s", "id": 414, "pid": 2338710, "tid": 2338710, "ts": 6345939851405.125, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939851496.444, "dur": 63.700, + "args": { + "External id": 984921,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939851573.532, "dur": 122.588, + "args": { + "External id": 984922,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939851711.098, "dur": 44.679, + "args": { + "External id": 984923,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939851768.194, "dur": 34.137, + "args": { + "External id": 984924,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939851838.790, "dur": 33.464, + "args": { + "External id": 984925,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345939851897.879, "dur": 21.118, + "args": { + "External id": 984926,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345939851944.335, "dur": 230.543, + "args": { + "External id": 984927,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345939852004.355, "dur": 38.180, + "args": { + "External id": 984928,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939852031.425, "dur": 8.626, + "args": { + "External id": 984929,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939852046.074, "dur": 5.056, + "args": { + "External id": 984930,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939852091.727, "dur": 3.616, + "args": { + "External id": 984931,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939852099.293, "dur": 6.885, + "args": { + "External id": 984932,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939852191.554, "dur": 69.446, + "args": { + "External id": 984933,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345939852302.569, "dur": 35.375, + "args": { + "External id": 984934,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939852352.185, "dur": 53.258, + "args": { + "External id": 984935,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939852414.611, "dur": 41.222, + "args": { + "External id": 984936,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939852487.308, "dur": 30.951, + "args": { + "External id": 984937,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939852526.648, "dur": 42.835, + "args": { + "External id": 984938,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939852592.043, "dur": 23.806, + "args": { + "External id": 984939,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17839 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.16)", "pid": 2338710, "tid": 2338710, + "ts": 6345939852784.297, "dur": 85.784, + "args": { + "External id": 984940,"Record function id": 0, "Ev Idx": 17840 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345939852958.809, "dur": 75.005, + "args": { + "External id": 984941,"Record function id": 0, "Ev Idx": 17841 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.17)", "pid": 2338710, "tid": 2338710, + "ts": 6345939853045.798, "dur": 31882.264, + "args": { + "External id": 984942,"Record function id": 0, "Ev Idx": 17842 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2338710, "tid": 2338710, + "ts": 6345939853097.405, "dur": 1156.311, + "args": { + "External id": 984943,"Record function id": 0, "Ev Idx": 17843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939853194.071, "dur": 13.146, + "args": { + "External id": 984944,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939853224.943, "dur": 42.662, + "args": { + "External id": 984945,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939853231.752, "dur": 2.688, + "args": { + "External id": 984946,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939853239.309, "dur": 0.476, + "args": { + "External id": 984947,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939853240.979, "dur": 0.554, + "args": { + "External id": 984948,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939853243.226, "dur": 0.612, + "args": { + "External id": 984949,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939853247.305, "dur": 0.607, + "args": { + "External id": 984950,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939853249.865, "dur": 0.682, + "args": { + "External id": 984951,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939853251.890, "dur": 4.095, + "args": { + "External id": 984952,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939853258.008, "dur": 0.437, + "args": { + "External id": 984953,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939853260.366, "dur": 0.458, + "args": { + "External id": 984954,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939853281.165, "dur": 68.141, + "args": { + "External id": 984955,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345939853391.144, "dur": 143.334, + "args": { + "External id": 984956,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939853407.080, "dur": 4.178, + "args": { + "External id": 984957,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345939853417.871, "dur": 11.982, + "args": { + "External id": 984958,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345939853423.218, "dur": 6.124, + "args": { + "External id": 984959,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939853427.249, "dur": 0.716, + "args": { + "External id": 984960,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939853437.690, "dur": 34.509, + "args": { + "External id": 984961,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939853440.300, "dur": 2.339, + "args": { + "External id": 984962,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939853444.349, "dur": 0.518, + "args": { + "External id": 984963,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939853446.467, "dur": 0.530, + "args": { + "External id": 984964,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939853451.362, "dur": 2.857, + "args": { + "External id": 984965,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939853455.813, "dur": 0.499, + "args": { + "External id": 984966,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939853457.813, "dur": 0.368, + "args": { + "External id": 984967,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939853461.310, "dur": 0.522, + "args": { + "External id": 984968,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939853463.475, "dur": 0.341, + "args": { + "External id": 984969,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939853465.314, "dur": 1.932, + "args": { + "External id": 984970,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939853487.771, "dur": 37.361, + "args": { + "External id": 984971,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345939853596.408, "dur": 530.576, + "args": { + "External id": 984972,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939853632.121, "dur": 487.738, + "args": { + "External id": 984973,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17873, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345939853644.532, "dur": 465.556, + "args": { + "External id": 984974,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939854158.317, "dur": 3.169, + "args": { + "External id": 984975,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17875, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2338710, "tid": 2338710, + "ts": 6345939854278.149, "dur": 30410.577, + "args": { + "External id": 984976,"Record function id": 0, "Ev Idx": 17876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939854398.069, "dur": 8.016, + "args": { + "External id": 984977,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939854410.764, "dur": 1.049, + "args": { + "External id": 984978,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939854413.836, "dur": 4.213, + "args": { + "External id": 984979,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939854419.765, "dur": 1.400, + "args": { + "External id": 984980,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939854422.809, "dur": 1.126, + "args": { + "External id": 984981,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939854425.340, "dur": 0.915, + "args": { + "External id": 984982,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939854430.622, "dur": 0.847, + "args": { + "External id": 984983,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939854433.281, "dur": 1.914, + "args": { + "External id": 984984,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939854436.655, "dur": 0.934, + "args": { + "External id": 984985,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939854439.469, "dur": 0.753, + "args": { + "External id": 984986,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939854465.060, "dur": 30169.730, + "args": { + "External id": 984987,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939854488.536, "dur": 30135.930, + "args": { + "External id": 984988,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939854505.142, "dur": 18.265, + "args": { + "External id": 984989,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345939854527.971, "dur": 30049.514, + "args": { + "External id": 984990,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939854531.009, "dur": 30044.596, + "args": { + "External id": 984991,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939854537.166, "dur": 6.517, + "args": { + "External id": 984992,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939854545.691, "dur": 30026.674, + "args": { + "External id": 984993,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939884857.245, "dur": 40.691, + "args": { + "External id": 984994,"Sequence number": 10552506, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17894 + } + }, + { + "ph": "s", "id": 413, "pid": 2338710, "tid": 2338710, "ts": 6345939884857.245, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345939884881.460, "dur": 10.872, + "args": { + "External id": 984995,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939884886.945, "dur": 5.118, + "args": { + "External id": 984996,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345939884979.065, "dur": 138.338, + "args": { + "External id": 984997,"Record function id": 0, "Ev Idx": 17897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345939885120.742, "dur": 1320.766, + "args": { + "External id": 984998,"Record function id": 0, "Ev Idx": 17898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939885172.161, "dur": 1252.090, + "args": { + "External id": 984999,"Sequence number": 10552507, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17899 + } + }, + { + "ph": "s", "id": 412, "pid": 2338710, "tid": 2338710, "ts": 6345939885172.161, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939885259.749, "dur": 63.073, + "args": { + "External id": 985000,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939885340.214, "dur": 121.121, + "args": { + "External id": 985001,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939885477.720, "dur": 43.990, + "args": { + "External id": 985002,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939885530.310, "dur": 34.090, + "args": { + "External id": 985003,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939885596.351, "dur": 33.620, + "args": { + "External id": 985004,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345939885654.171, "dur": 21.133, + "args": { + "External id": 985005,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345939885702.440, "dur": 159.371, + "args": { + "External id": 985006,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345939885763.070, "dur": 15.284, + "args": { + "External id": 985007,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939885770.490, "dur": 6.923, + "args": { + "External id": 985008,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939885782.629, "dur": 4.793, + "args": { + "External id": 985009,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939885789.051, "dur": 1.272, + "args": { + "External id": 985010,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939885793.267, "dur": 6.226, + "args": { + "External id": 985011,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939885874.890, "dur": 55.949, + "args": { + "External id": 985012,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345939885967.125, "dur": 34.808, + "args": { + "External id": 985013,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939886034.013, "dur": 94.945, + "args": { + "External id": 985014,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939886145.084, "dur": 44.603, + "args": { + "External id": 985015,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939886220.289, "dur": 30.847, + "args": { + "External id": 985016,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939886260.009, "dur": 41.645, + "args": { + "External id": 985017,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939886323.223, "dur": 23.094, + "args": { + "External id": 985018,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17918 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.17)", "pid": 2338710, "tid": 2338710, + "ts": 6345939886518.015, "dur": 92.798, + "args": { + "External id": 985019,"Record function id": 0, "Ev Idx": 17919 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345939886701.082, "dur": 55.063, + "args": { + "External id": 985020,"Record function id": 0, "Ev Idx": 17920 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.18)", "pid": 2338710, "tid": 2338710, + "ts": 6345939886765.734, "dur": 31843.570, + "args": { + "External id": 985021,"Record function id": 0, "Ev Idx": 17921 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2338710, "tid": 2338710, + "ts": 6345939886774.974, "dur": 1095.724, + "args": { + "External id": 985022,"Record function id": 0, "Ev Idx": 17922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939886871.634, "dur": 10.298, + "args": { + "External id": 985023,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939886897.806, "dur": 38.836, + "args": { + "External id": 985024,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939886903.988, "dur": 2.804, + "args": { + "External id": 985025,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939886911.515, "dur": 0.402, + "args": { + "External id": 985026,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939886913.068, "dur": 0.805, + "args": { + "External id": 985027,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939886915.524, "dur": 0.537, + "args": { + "External id": 985028,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939886918.339, "dur": 0.654, + "args": { + "External id": 985029,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939886920.446, "dur": 0.596, + "args": { + "External id": 985030,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939886922.337, "dur": 3.202, + "args": { + "External id": 985031,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939886927.015, "dur": 0.426, + "args": { + "External id": 985032,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939886928.550, "dur": 0.567, + "args": { + "External id": 985033,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939886949.555, "dur": 88.397, + "args": { + "External id": 985034,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345939887127.168, "dur": 154.040, + "args": { + "External id": 985035,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939887143.122, "dur": 7.727, + "args": { + "External id": 985036,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345939887157.753, "dur": 13.985, + "args": { + "External id": 985037,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345939887163.142, "dur": 8.113, + "args": { + "External id": 985038,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939887168.188, "dur": 0.883, + "args": { + "External id": 985039,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939887180.634, "dur": 34.573, + "args": { + "External id": 985040,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939887183.217, "dur": 1.772, + "args": { + "External id": 985041,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939887186.767, "dur": 0.604, + "args": { + "External id": 985042,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939887188.542, "dur": 0.566, + "args": { + "External id": 985043,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939887192.913, "dur": 2.510, + "args": { + "External id": 985044,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939887196.895, "dur": 0.653, + "args": { + "External id": 985045,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939887199.075, "dur": 0.467, + "args": { + "External id": 985046,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939887202.426, "dur": 0.324, + "args": { + "External id": 985047,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939887206.921, "dur": 0.268, + "args": { + "External id": 985048,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939887208.562, "dur": 1.848, + "args": { + "External id": 985049,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939887230.558, "dur": 41.053, + "args": { + "External id": 985050,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345939887349.018, "dur": 412.414, + "args": { + "External id": 985051,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939887385.245, "dur": 370.268, + "args": { + "External id": 985052,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17952, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345939887397.892, "dur": 351.253, + "args": { + "External id": 985053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939887790.827, "dur": 2.584, + "args": { + "External id": 985054,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17954, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2338710, "tid": 2338710, + "ts": 6345939887893.937, "dur": 30459.806, + "args": { + "External id": 985055,"Record function id": 0, "Ev Idx": 17955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939888006.788, "dur": 30.503, + "args": { + "External id": 985056,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939888044.525, "dur": 1.168, + "args": { + "External id": 985057,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939888047.963, "dur": 3.335, + "args": { + "External id": 985058,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939888092.185, "dur": 1.631, + "args": { + "External id": 985059,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939888096.186, "dur": 2.074, + "args": { + "External id": 985060,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939888099.720, "dur": 1.271, + "args": { + "External id": 985061,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939888102.702, "dur": 0.875, + "args": { + "External id": 985062,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939888105.598, "dur": 2.297, + "args": { + "External id": 985063,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939888109.396, "dur": 0.732, + "args": { + "External id": 985064,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939888113.973, "dur": 0.819, + "args": { + "External id": 985065,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939888139.391, "dur": 30153.281, + "args": { + "External id": 985066,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939888158.285, "dur": 30123.737, + "args": { + "External id": 985067,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939888177.414, "dur": 19.720, + "args": { + "External id": 985068,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345939888201.385, "dur": 30033.853, + "args": { + "External id": 985069,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939888204.425, "dur": 30029.405, + "args": { + "External id": 985070,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939888211.198, "dur": 7.225, + "args": { + "External id": 985071,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939888220.290, "dur": 30009.284, + "args": { + "External id": 985072,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939918533.876, "dur": 45.062, + "args": { + "External id": 985073,"Sequence number": 10552508, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17973 + } + }, + { + "ph": "s", "id": 411, "pid": 2338710, "tid": 2338710, "ts": 6345939918533.876, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345939918560.278, "dur": 12.109, + "args": { + "External id": 985074,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939918566.000, "dur": 6.111, + "args": { + "External id": 985075,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345939918659.871, "dur": 88.435, + "args": { + "External id": 985076,"Record function id": 0, "Ev Idx": 17976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345939918750.286, "dur": 1481.780, + "args": { + "External id": 985077,"Record function id": 0, "Ev Idx": 17977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939918795.217, "dur": 1419.492, + "args": { + "External id": 985078,"Sequence number": 10552509, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17978 + } + }, + { + "ph": "s", "id": 410, "pid": 2338710, "tid": 2338710, "ts": 6345939918795.217, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939918880.692, "dur": 56.783, + "args": { + "External id": 985079,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939918954.031, "dur": 184.524, + "args": { + "External id": 985080,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939919160.433, "dur": 52.094, + "args": { + "External id": 985081,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939919223.632, "dur": 34.734, + "args": { + "External id": 985082,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939919296.347, "dur": 35.290, + "args": { + "External id": 985083,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345939919357.987, "dur": 21.720, + "args": { + "External id": 985084,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345939919407.292, "dur": 226.694, + "args": { + "External id": 985085,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345939919469.212, "dur": 16.046, + "args": { + "External id": 985086,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939919476.406, "dur": 7.593, + "args": { + "External id": 985087,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939919489.732, "dur": 4.944, + "args": { + "External id": 985088,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939919516.741, "dur": 1.250, + "args": { + "External id": 985089,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939919555.659, "dur": 6.498, + "args": { + "External id": 985090,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939919648.771, "dur": 65.172, + "args": { + "External id": 985091,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345939919752.341, "dur": 37.947, + "args": { + "External id": 985092,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939919801.491, "dur": 54.861, + "args": { + "External id": 985093,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939919866.557, "dur": 41.532, + "args": { + "External id": 985094,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939919934.220, "dur": 35.990, + "args": { + "External id": 985095,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939919976.942, "dur": 64.534, + "args": { + "External id": 985096,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939920109.114, "dur": 26.837, + "args": { + "External id": 985097,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17997 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.18)", "pid": 2338710, "tid": 2338710, + "ts": 6345939920308.587, "dur": 92.803, + "args": { + "External id": 985098,"Record function id": 0, "Ev Idx": 17998 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345939920491.724, "dur": 56.297, + "args": { + "External id": 985099,"Record function id": 0, "Ev Idx": 17999 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.19)", "pid": 2338710, "tid": 2338710, + "ts": 6345939920558.811, "dur": 31482.990, + "args": { + "External id": 985100,"Record function id": 0, "Ev Idx": 18000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2338710, "tid": 2338710, + "ts": 6345939920568.707, "dur": 1088.333, + "args": { + "External id": 985101,"Record function id": 0, "Ev Idx": 18001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939920663.291, "dur": 11.176, + "args": { + "External id": 985102,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939920691.961, "dur": 41.903, + "args": { + "External id": 985103,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939920698.220, "dur": 2.824, + "args": { + "External id": 985104,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939920705.878, "dur": 0.389, + "args": { + "External id": 985105,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939920707.946, "dur": 0.455, + "args": { + "External id": 985106,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939920709.941, "dur": 0.448, + "args": { + "External id": 985107,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939920713.677, "dur": 0.523, + "args": { + "External id": 985108,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939920715.868, "dur": 0.574, + "args": { + "External id": 985109,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939920717.891, "dur": 4.820, + "args": { + "External id": 985110,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939920724.745, "dur": 0.556, + "args": { + "External id": 985111,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939920726.754, "dur": 0.464, + "args": { + "External id": 985112,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939920747.233, "dur": 60.130, + "args": { + "External id": 985113,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345939920846.718, "dur": 140.258, + "args": { + "External id": 985114,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939920860.645, "dur": 3.947, + "args": { + "External id": 985115,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345939920870.491, "dur": 12.644, + "args": { + "External id": 985116,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345939920876.018, "dur": 6.597, + "args": { + "External id": 985117,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939920880.531, "dur": 0.555, + "args": { + "External id": 985118,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939920890.772, "dur": 36.784, + "args": { + "External id": 985119,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939920893.284, "dur": 2.209, + "args": { + "External id": 985120,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939920897.318, "dur": 0.380, + "args": { + "External id": 985121,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939920903.244, "dur": 0.353, + "args": { + "External id": 985122,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939920907.366, "dur": 3.453, + "args": { + "External id": 985123,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939920912.579, "dur": 0.396, + "args": { + "External id": 985124,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939920914.481, "dur": 1.938, + "args": { + "External id": 985125,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939920917.627, "dur": 0.336, + "args": { + "External id": 985126,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939920919.286, "dur": 0.408, + "args": { + "External id": 985127,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939920922.769, "dur": 0.372, + "args": { + "External id": 985128,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939920941.726, "dur": 36.469, + "args": { + "External id": 985129,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345939921117.698, "dur": 425.006, + "args": { + "External id": 985130,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939921154.831, "dur": 382.005, + "args": { + "External id": 985131,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18031, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345939921168.437, "dur": 362.096, + "args": { + "External id": 985132,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939921570.536, "dur": 2.734, + "args": { + "External id": 985133,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18033, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2338710, "tid": 2338710, + "ts": 6345939921680.665, "dur": 30080.992, + "args": { + "External id": 985134,"Record function id": 0, "Ev Idx": 18034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939921798.826, "dur": 7.612, + "args": { + "External id": 985135,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939921815.928, "dur": 1.650, + "args": { + "External id": 985136,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939921820.009, "dur": 4.395, + "args": { + "External id": 985137,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939921828.648, "dur": 1.096, + "args": { + "External id": 985138,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939921831.414, "dur": 0.934, + "args": { + "External id": 985139,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939921833.828, "dur": 0.904, + "args": { + "External id": 985140,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939921836.579, "dur": 1.041, + "args": { + "External id": 985141,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939921840.995, "dur": 2.166, + "args": { + "External id": 985142,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939921845.010, "dur": 0.934, + "args": { + "External id": 985143,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939921847.496, "dur": 0.676, + "args": { + "External id": 985144,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939921871.191, "dur": 29837.012, + "args": { + "External id": 985145,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939921889.068, "dur": 29809.204, + "args": { + "External id": 985146,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939921907.708, "dur": 20.064, + "args": { + "External id": 985147,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345939921934.305, "dur": 29720.587, + "args": { + "External id": 985148,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939921937.110, "dur": 29716.576, + "args": { + "External id": 985149,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939921943.503, "dur": 5.675, + "args": { + "External id": 985150,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939921951.068, "dur": 29698.257, + "args": { + "External id": 985151,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939951947.869, "dur": 41.932, + "args": { + "External id": 985152,"Sequence number": 10552510, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18052 + } + }, + { + "ph": "s", "id": 409, "pid": 2338710, "tid": 2338710, "ts": 6345939951947.869, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345939951971.262, "dur": 11.893, + "args": { + "External id": 985153,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939951976.682, "dur": 6.174, + "args": { + "External id": 985154,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345939952125.422, "dur": 92.157, + "args": { + "External id": 985155,"Record function id": 0, "Ev Idx": 18055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345939952219.633, "dur": 1336.388, + "args": { + "External id": 985156,"Record function id": 0, "Ev Idx": 18056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939952266.266, "dur": 1273.473, + "args": { + "External id": 985157,"Sequence number": 10552511, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18057 + } + }, + { + "ph": "s", "id": 408, "pid": 2338710, "tid": 2338710, "ts": 6345939952266.266, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939952354.213, "dur": 66.790, + "args": { + "External id": 985158,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939952438.591, "dur": 121.043, + "args": { + "External id": 985159,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939952576.020, "dur": 44.951, + "args": { + "External id": 985160,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939952631.517, "dur": 34.843, + "args": { + "External id": 985161,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939952698.532, "dur": 33.124, + "args": { + "External id": 985162,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345939952756.509, "dur": 21.166, + "args": { + "External id": 985163,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345939952803.911, "dur": 161.230, + "args": { + "External id": 985164,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345939952866.165, "dur": 15.505, + "args": { + "External id": 985165,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939952873.609, "dur": 7.194, + "args": { + "External id": 985166,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939952885.926, "dur": 5.230, + "args": { + "External id": 985167,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939952892.908, "dur": 1.128, + "args": { + "External id": 985168,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939952897.067, "dur": 5.935, + "args": { + "External id": 985169,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939952978.334, "dur": 124.252, + "args": { + "External id": 985170,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345939953144.995, "dur": 37.238, + "args": { + "External id": 985171,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939953193.444, "dur": 54.700, + "args": { + "External id": 985172,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939953258.569, "dur": 42.718, + "args": { + "External id": 985173,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939953328.137, "dur": 32.922, + "args": { + "External id": 985174,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939953369.593, "dur": 45.147, + "args": { + "External id": 985175,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939953437.750, "dur": 22.894, + "args": { + "External id": 985176,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18076 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.19)", "pid": 2338710, "tid": 2338710, + "ts": 6345939953632.252, "dur": 88.900, + "args": { + "External id": 985177,"Record function id": 0, "Ev Idx": 18077 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345939953808.727, "dur": 56.163, + "args": { + "External id": 985178,"Record function id": 0, "Ev Idx": 18078 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.20)", "pid": 2338710, "tid": 2338710, + "ts": 6345939953875.545, "dur": 31417.730, + "args": { + "External id": 985179,"Record function id": 0, "Ev Idx": 18079 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2338710, "tid": 2338710, + "ts": 6345939953885.200, "dur": 1095.919, + "args": { + "External id": 985180,"Record function id": 0, "Ev Idx": 18080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939953981.772, "dur": 10.643, + "args": { + "External id": 985181,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939954029.534, "dur": 81.296, + "args": { + "External id": 985182,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939954036.987, "dur": 2.767, + "args": { + "External id": 985183,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939954044.671, "dur": 0.560, + "args": { + "External id": 985184,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939954046.683, "dur": 0.436, + "args": { + "External id": 985185,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939954048.690, "dur": 0.326, + "args": { + "External id": 985186,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939954051.114, "dur": 0.442, + "args": { + "External id": 985187,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939954053.051, "dur": 0.392, + "args": { + "External id": 985188,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939954054.870, "dur": 41.934, + "args": { + "External id": 985189,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939954101.240, "dur": 0.509, + "args": { + "External id": 985190,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939954103.175, "dur": 0.534, + "args": { + "External id": 985191,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939954125.708, "dur": 67.806, + "args": { + "External id": 985192,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345939954236.734, "dur": 143.086, + "args": { + "External id": 985193,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939954251.847, "dur": 6.233, + "args": { + "External id": 985194,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345939954264.620, "dur": 12.896, + "args": { + "External id": 985195,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345939954270.345, "dur": 6.646, + "args": { + "External id": 985196,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939954274.521, "dur": 0.822, + "args": { + "External id": 985197,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939954286.567, "dur": 32.717, + "args": { + "External id": 985198,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939954289.000, "dur": 2.645, + "args": { + "External id": 985199,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939954293.514, "dur": 0.539, + "args": { + "External id": 985200,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939954295.870, "dur": 0.430, + "args": { + "External id": 985201,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939954300.138, "dur": 2.311, + "args": { + "External id": 985202,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939954303.801, "dur": 0.478, + "args": { + "External id": 985203,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939954306.299, "dur": 0.257, + "args": { + "External id": 985204,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939954309.076, "dur": 0.506, + "args": { + "External id": 985205,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939954311.169, "dur": 0.470, + "args": { + "External id": 985206,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939954312.863, "dur": 1.975, + "args": { + "External id": 985207,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939954333.362, "dur": 37.540, + "args": { + "External id": 985208,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345939954450.823, "dur": 417.431, + "args": { + "External id": 985209,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939954487.329, "dur": 375.252, + "args": { + "External id": 985210,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18110, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345939954500.315, "dur": 356.082, + "args": { + "External id": 985211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939954897.500, "dur": 2.713, + "args": { + "External id": 985212,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18112, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2338710, "tid": 2338710, + "ts": 6345939955006.102, "dur": 29999.643, + "args": { + "External id": 985213,"Record function id": 0, "Ev Idx": 18113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939955195.855, "dur": 8.607, + "args": { + "External id": 985214,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939955208.983, "dur": 0.910, + "args": { + "External id": 985215,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939955212.079, "dur": 3.333, + "args": { + "External id": 985216,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939955217.336, "dur": 0.995, + "args": { + "External id": 985217,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939955219.940, "dur": 0.846, + "args": { + "External id": 985218,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939955222.297, "dur": 1.114, + "args": { + "External id": 985219,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939955225.126, "dur": 0.884, + "args": { + "External id": 985220,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939955228.596, "dur": 2.460, + "args": { + "External id": 985221,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939955232.895, "dur": 0.893, + "args": { + "External id": 985222,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939955237.987, "dur": 0.887, + "args": { + "External id": 985223,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939955261.290, "dur": 29686.623, + "args": { + "External id": 985224,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939955280.602, "dur": 29657.086, + "args": { + "External id": 985225,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939955300.410, "dur": 19.975, + "args": { + "External id": 985226,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345939955324.672, "dur": 29566.593, + "args": { + "External id": 985227,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939955328.795, "dur": 29560.762, + "args": { + "External id": 985228,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939955335.495, "dur": 6.810, + "args": { + "External id": 985229,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939955344.154, "dur": 29541.804, + "args": { + "External id": 985230,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939985221.331, "dur": 40.873, + "args": { + "External id": 985231,"Sequence number": 10552512, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18131 + } + }, + { + "ph": "s", "id": 407, "pid": 2338710, "tid": 2338710, "ts": 6345939985221.331, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345939985243.910, "dur": 12.432, + "args": { + "External id": 985232,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939985249.545, "dur": 6.364, + "args": { + "External id": 985233,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345939985342.018, "dur": 88.400, + "args": { + "External id": 985234,"Record function id": 0, "Ev Idx": 18134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345939985432.700, "dur": 1344.127, + "args": { + "External id": 985235,"Record function id": 0, "Ev Idx": 18135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345939985476.167, "dur": 1284.092, + "args": { + "External id": 985236,"Sequence number": 10552513, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18136 + } + }, + { + "ph": "s", "id": 406, "pid": 2338710, "tid": 2338710, "ts": 6345939985476.167, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939985560.305, "dur": 61.108, + "args": { + "External id": 985237,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939985638.471, "dur": 122.735, + "args": { + "External id": 985238,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939985777.368, "dur": 46.379, + "args": { + "External id": 985239,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939985833.842, "dur": 35.591, + "args": { + "External id": 985240,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939985905.795, "dur": 32.309, + "args": { + "External id": 985241,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345939985962.277, "dur": 22.769, + "args": { + "External id": 985242,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345939986034.233, "dur": 207.196, + "args": { + "External id": 985243,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345939986138.995, "dur": 16.635, + "args": { + "External id": 985244,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939986146.525, "dur": 7.773, + "args": { + "External id": 985245,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939986158.687, "dur": 5.185, + "args": { + "External id": 985246,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939986165.544, "dur": 1.228, + "args": { + "External id": 985247,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939986169.847, "dur": 6.277, + "args": { + "External id": 985248,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939986254.678, "dur": 69.933, + "args": { + "External id": 985249,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345939986362.792, "dur": 38.265, + "args": { + "External id": 985250,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939986411.698, "dur": 52.829, + "args": { + "External id": 985251,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939986476.050, "dur": 41.448, + "args": { + "External id": 985252,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345939986542.394, "dur": 33.188, + "args": { + "External id": 985253,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345939986584.062, "dur": 50.657, + "args": { + "External id": 985254,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345939986657.330, "dur": 22.988, + "args": { + "External id": 985255,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18155 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.20)", "pid": 2338710, "tid": 2338710, + "ts": 6345939986851.411, "dur": 85.653, + "args": { + "External id": 985256,"Record function id": 0, "Ev Idx": 18156 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345939987050.327, "dur": 102.481, + "args": { + "External id": 985257,"Record function id": 0, "Ev Idx": 18157 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.21)", "pid": 2338710, "tid": 2338710, + "ts": 6345939987165.381, "dur": 32037.661, + "args": { + "External id": 985258,"Record function id": 0, "Ev Idx": 18158 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2338710, "tid": 2338710, + "ts": 6345939987175.066, "dur": 1096.982, + "args": { + "External id": 985259,"Record function id": 0, "Ev Idx": 18159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939987273.131, "dur": 11.125, + "args": { + "External id": 985260,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939987300.884, "dur": 41.522, + "args": { + "External id": 985261,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939987307.208, "dur": 2.779, + "args": { + "External id": 985262,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939987314.653, "dur": 0.495, + "args": { + "External id": 985263,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939987316.646, "dur": 0.537, + "args": { + "External id": 985264,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939987319.092, "dur": 0.578, + "args": { + "External id": 985265,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939987322.994, "dur": 0.466, + "args": { + "External id": 985266,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939987325.063, "dur": 0.541, + "args": { + "External id": 985267,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939987327.271, "dur": 4.002, + "args": { + "External id": 985268,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939987332.941, "dur": 0.627, + "args": { + "External id": 985269,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939987334.874, "dur": 0.507, + "args": { + "External id": 985270,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939987356.201, "dur": 67.479, + "args": { + "External id": 985271,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345939987463.042, "dur": 141.527, + "args": { + "External id": 985272,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939987476.821, "dur": 4.398, + "args": { + "External id": 985273,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345939987487.672, "dur": 12.309, + "args": { + "External id": 985274,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345939987493.323, "dur": 6.179, + "args": { + "External id": 985275,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939987497.434, "dur": 0.565, + "args": { + "External id": 985276,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345939987507.942, "dur": 35.528, + "args": { + "External id": 985277,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939987510.441, "dur": 2.352, + "args": { + "External id": 985278,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939987514.840, "dur": 0.520, + "args": { + "External id": 985279,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939987519.762, "dur": 0.566, + "args": { + "External id": 985280,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939987523.292, "dur": 2.676, + "args": { + "External id": 985281,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939987527.733, "dur": 0.395, + "args": { + "External id": 985282,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939987529.804, "dur": 1.484, + "args": { + "External id": 985283,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939987532.425, "dur": 0.488, + "args": { + "External id": 985284,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939987534.615, "dur": 0.373, + "args": { + "External id": 985285,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939987538.649, "dur": 0.410, + "args": { + "External id": 985286,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939987557.640, "dur": 38.269, + "args": { + "External id": 985287,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345939987668.172, "dur": 480.171, + "args": { + "External id": 985288,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939987703.411, "dur": 437.822, + "args": { + "External id": 985289,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18189, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345939987715.398, "dur": 418.451, + "args": { + "External id": 985290,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345939988182.187, "dur": 2.841, + "args": { + "External id": 985291,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18191, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2338710, "tid": 2338710, + "ts": 6345939988296.326, "dur": 30605.341, + "args": { + "External id": 985292,"Record function id": 0, "Ev Idx": 18192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939988416.795, "dur": 7.631, + "args": { + "External id": 985293,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939988428.317, "dur": 1.303, + "args": { + "External id": 985294,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939988431.573, "dur": 3.965, + "args": { + "External id": 985295,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939988437.686, "dur": 1.008, + "args": { + "External id": 985296,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939988440.282, "dur": 0.700, + "args": { + "External id": 985297,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939988442.558, "dur": 0.925, + "args": { + "External id": 985298,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939988445.223, "dur": 0.822, + "args": { + "External id": 985299,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939988447.966, "dur": 1.975, + "args": { + "External id": 985300,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939988451.609, "dur": 0.744, + "args": { + "External id": 985301,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345939988456.155, "dur": 0.957, + "args": { + "External id": 985302,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939988479.216, "dur": 30366.281, + "args": { + "External id": 985303,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939988497.041, "dur": 30337.518, + "args": { + "External id": 985304,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345939988516.254, "dur": 19.551, + "args": { + "External id": 985305,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345939988540.217, "dur": 30245.298, + "args": { + "External id": 985306,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345939988543.428, "dur": 30241.286, + "args": { + "External id": 985307,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345939988550.013, "dur": 6.935, + "args": { + "External id": 985308,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345939988559.019, "dur": 30220.541, + "args": { + "External id": 985309,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940019130.597, "dur": 41.278, + "args": { + "External id": 985310,"Sequence number": 10552514, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18210 + } + }, + { + "ph": "s", "id": 405, "pid": 2338710, "tid": 2338710, "ts": 6345940019130.597, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345940019152.957, "dur": 13.469, + "args": { + "External id": 985311,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940019158.874, "dur": 7.162, + "args": { + "External id": 985312,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345940019251.640, "dur": 91.037, + "args": { + "External id": 985313,"Record function id": 0, "Ev Idx": 18213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345940019344.100, "dur": 1365.715, + "args": { + "External id": 985314,"Record function id": 0, "Ev Idx": 18214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940019391.710, "dur": 1302.273, + "args": { + "External id": 985315,"Sequence number": 10552515, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18215 + } + }, + { + "ph": "s", "id": 404, "pid": 2338710, "tid": 2338710, "ts": 6345940019391.710, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345940019479.462, "dur": 62.007, + "args": { + "External id": 985316,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940019558.465, "dur": 124.416, + "args": { + "External id": 985317,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940019699.776, "dur": 47.528, + "args": { + "External id": 985318,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940019757.340, "dur": 36.048, + "args": { + "External id": 985319,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345940019823.795, "dur": 32.900, + "args": { + "External id": 985320,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345940019891.741, "dur": 21.968, + "args": { + "External id": 985321,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345940019942.523, "dur": 233.132, + "args": { + "External id": 985322,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940020002.781, "dur": 36.878, + "args": { + "External id": 985323,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940020030.181, "dur": 8.235, + "args": { + "External id": 985324,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940020044.541, "dur": 5.383, + "args": { + "External id": 985325,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940020051.495, "dur": 40.197, + "args": { + "External id": 985326,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940020097.301, "dur": 8.079, + "args": { + "External id": 985327,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940020188.786, "dur": 71.876, + "args": { + "External id": 985328,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345940020300.381, "dur": 39.000, + "args": { + "External id": 985329,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940020351.481, "dur": 50.602, + "args": { + "External id": 985330,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940020410.846, "dur": 42.171, + "args": { + "External id": 985331,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345940020479.001, "dur": 32.392, + "args": { + "External id": 985332,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940020519.797, "dur": 45.803, + "args": { + "External id": 985333,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345940020590.049, "dur": 24.600, + "args": { + "External id": 985334,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18234 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.21)", "pid": 2338710, "tid": 2338710, + "ts": 6345940020787.221, "dur": 92.795, + "args": { + "External id": 985335,"Record function id": 0, "Ev Idx": 18235 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345940020970.019, "dur": 80.437, + "args": { + "External id": 985336,"Record function id": 0, "Ev Idx": 18236 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.22)", "pid": 2338710, "tid": 2338710, + "ts": 6345940021106.219, "dur": 31519.221, + "args": { + "External id": 985337,"Record function id": 0, "Ev Idx": 18237 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2338710, "tid": 2338710, + "ts": 6345940021117.021, "dur": 1092.101, + "args": { + "External id": 985338,"Record function id": 0, "Ev Idx": 18238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940021218.772, "dur": 12.704, + "args": { + "External id": 985339,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345940021247.719, "dur": 41.728, + "args": { + "External id": 985340,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940021254.018, "dur": 2.552, + "args": { + "External id": 985341,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940021261.508, "dur": 0.723, + "args": { + "External id": 985342,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940021263.780, "dur": 0.672, + "args": { + "External id": 985343,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940021266.207, "dur": 0.801, + "args": { + "External id": 985344,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940021269.606, "dur": 0.457, + "args": { + "External id": 985345,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940021271.518, "dur": 0.608, + "args": { + "External id": 985346,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940021273.511, "dur": 4.433, + "args": { + "External id": 985347,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940021279.857, "dur": 0.530, + "args": { + "External id": 985348,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940021281.936, "dur": 0.541, + "args": { + "External id": 985349,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940021303.439, "dur": 66.939, + "args": { + "External id": 985350,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345940021410.694, "dur": 137.867, + "args": { + "External id": 985351,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940021424.704, "dur": 4.765, + "args": { + "External id": 985352,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345940021435.490, "dur": 12.512, + "args": { + "External id": 985353,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940021440.997, "dur": 6.500, + "args": { + "External id": 985354,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940021445.382, "dur": 0.555, + "args": { + "External id": 985355,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345940021455.799, "dur": 33.639, + "args": { + "External id": 985356,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940021457.897, "dur": 2.471, + "args": { + "External id": 985357,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940021462.240, "dur": 0.812, + "args": { + "External id": 985358,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940021464.716, "dur": 0.587, + "args": { + "External id": 985359,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940021468.926, "dur": 3.086, + "args": { + "External id": 985360,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940021473.268, "dur": 0.457, + "args": { + "External id": 985361,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940021475.166, "dur": 0.358, + "args": { + "External id": 985362,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940021478.612, "dur": 0.523, + "args": { + "External id": 985363,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940021480.810, "dur": 0.472, + "args": { + "External id": 985364,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940021482.320, "dur": 2.832, + "args": { + "External id": 985365,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940021502.627, "dur": 36.932, + "args": { + "External id": 985366,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345940021617.085, "dur": 425.189, + "args": { + "External id": 985367,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345940021653.067, "dur": 382.596, + "args": { + "External id": 985368,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18268, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345940021664.981, "dur": 340.815, + "args": { + "External id": 985369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345940022112.241, "dur": 3.844, + "args": { + "External id": 985370,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18270, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2338710, "tid": 2338710, + "ts": 6345940022233.928, "dur": 30161.075, + "args": { + "External id": 985371,"Record function id": 0, "Ev Idx": 18271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940022362.048, "dur": 7.861, + "args": { + "External id": 985372,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940022379.054, "dur": 1.319, + "args": { + "External id": 985373,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940022382.390, "dur": 3.397, + "args": { + "External id": 985374,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940022390.069, "dur": 0.919, + "args": { + "External id": 985375,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940022392.665, "dur": 1.105, + "args": { + "External id": 985376,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940022395.438, "dur": 0.779, + "args": { + "External id": 985377,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940022398.252, "dur": 0.991, + "args": { + "External id": 985378,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940022403.122, "dur": 2.355, + "args": { + "External id": 985379,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940022407.200, "dur": 0.839, + "args": { + "External id": 985380,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940022409.436, "dur": 0.694, + "args": { + "External id": 985381,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345940022433.145, "dur": 29909.324, + "args": { + "External id": 985382,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345940022450.931, "dur": 29881.298, + "args": { + "External id": 985383,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940022468.752, "dur": 20.236, + "args": { + "External id": 985384,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345940022496.463, "dur": 29792.879, + "args": { + "External id": 985385,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345940022499.748, "dur": 29788.014, + "args": { + "External id": 985386,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940022505.491, "dur": 6.728, + "args": { + "External id": 985387,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940022514.177, "dur": 29769.297, + "args": { + "External id": 985388,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940052556.419, "dur": 39.448, + "args": { + "External id": 985389,"Sequence number": 10552516, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18289 + } + }, + { + "ph": "s", "id": 403, "pid": 2338710, "tid": 2338710, "ts": 6345940052556.419, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345940052578.640, "dur": 12.048, + "args": { + "External id": 985390,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940052584.367, "dur": 6.078, + "args": { + "External id": 985391,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345940052671.925, "dur": 86.482, + "args": { + "External id": 985392,"Record function id": 0, "Ev Idx": 18292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345940052760.029, "dur": 1373.372, + "args": { + "External id": 985393,"Record function id": 0, "Ev Idx": 18293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940052803.692, "dur": 1311.130, + "args": { + "External id": 985394,"Sequence number": 10552517, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18294 + } + }, + { + "ph": "s", "id": 402, "pid": 2338710, "tid": 2338710, "ts": 6345940052803.692, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345940052889.998, "dur": 60.722, + "args": { + "External id": 985395,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940052965.608, "dur": 180.659, + "args": { + "External id": 985396,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940053167.559, "dur": 50.848, + "args": { + "External id": 985397,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940053228.810, "dur": 36.385, + "args": { + "External id": 985398,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345940053301.125, "dur": 35.312, + "args": { + "External id": 985399,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345940053362.896, "dur": 22.614, + "args": { + "External id": 985400,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345940053413.912, "dur": 159.463, + "args": { + "External id": 985401,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940053474.222, "dur": 14.678, + "args": { + "External id": 985402,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940053481.843, "dur": 6.029, + "args": { + "External id": 985403,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940053491.890, "dur": 5.247, + "args": { + "External id": 985404,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940053498.663, "dur": 1.518, + "args": { + "External id": 985405,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940053503.503, "dur": 6.784, + "args": { + "External id": 985406,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940053585.425, "dur": 56.129, + "args": { + "External id": 985407,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345940053677.070, "dur": 36.769, + "args": { + "External id": 985408,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940053725.147, "dur": 49.110, + "args": { + "External id": 985409,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940053784.648, "dur": 39.531, + "args": { + "External id": 985410,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345940053849.114, "dur": 32.997, + "args": { + "External id": 985411,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940053889.874, "dur": 42.495, + "args": { + "External id": 985412,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345940053954.927, "dur": 21.019, + "args": { + "External id": 985413,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18313 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.22)", "pid": 2338710, "tid": 2338710, + "ts": 6345940054212.606, "dur": 93.683, + "args": { + "External id": 985414,"Record function id": 0, "Ev Idx": 18314 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345940054398.274, "dur": 55.348, + "args": { + "External id": 985415,"Record function id": 0, "Ev Idx": 18315 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.23)", "pid": 2338710, "tid": 2338710, + "ts": 6345940054463.422, "dur": 31097.846, + "args": { + "External id": 985416,"Record function id": 0, "Ev Idx": 18316 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2338710, "tid": 2338710, + "ts": 6345940054472.696, "dur": 1098.580, + "args": { + "External id": 985417,"Record function id": 0, "Ev Idx": 18317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940054570.806, "dur": 12.000, + "args": { + "External id": 985418,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345940054598.085, "dur": 39.415, + "args": { + "External id": 985419,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940054604.552, "dur": 2.942, + "args": { + "External id": 985420,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940054612.171, "dur": 0.551, + "args": { + "External id": 985421,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940054614.254, "dur": 0.561, + "args": { + "External id": 985422,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940054616.520, "dur": 0.540, + "args": { + "External id": 985423,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940054619.573, "dur": 0.512, + "args": { + "External id": 985424,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940054621.560, "dur": 0.473, + "args": { + "External id": 985425,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940054623.282, "dur": 3.933, + "args": { + "External id": 985426,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940054628.687, "dur": 0.335, + "args": { + "External id": 985427,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940054630.253, "dur": 0.340, + "args": { + "External id": 985428,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940054655.247, "dur": 67.558, + "args": { + "External id": 985429,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345940054764.380, "dur": 140.164, + "args": { + "External id": 985430,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940054777.714, "dur": 5.906, + "args": { + "External id": 985431,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345940054789.746, "dur": 12.353, + "args": { + "External id": 985432,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940054794.950, "dur": 6.633, + "args": { + "External id": 985433,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940054799.293, "dur": 0.732, + "args": { + "External id": 985434,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345940054810.681, "dur": 32.293, + "args": { + "External id": 985435,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940054813.281, "dur": 0.654, + "args": { + "External id": 985436,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940054816.344, "dur": 2.044, + "args": { + "External id": 985437,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940054819.881, "dur": 0.673, + "args": { + "External id": 985438,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940054822.084, "dur": 3.065, + "args": { + "External id": 985439,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940054827.825, "dur": 0.360, + "args": { + "External id": 985440,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940054830.112, "dur": 0.289, + "args": { + "External id": 985441,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940054831.759, "dur": 0.761, + "args": { + "External id": 985442,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940054836.441, "dur": 0.468, + "args": { + "External id": 985443,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940054838.071, "dur": 0.270, + "args": { + "External id": 985444,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940054857.406, "dur": 38.018, + "args": { + "External id": 985445,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345940054967.362, "dur": 482.955, + "args": { + "External id": 985446,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345940055003.560, "dur": 440.053, + "args": { + "External id": 985447,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18347, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345940055036.673, "dur": 399.925, + "args": { + "External id": 985448,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345940055483.121, "dur": 2.688, + "args": { + "External id": 985449,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18349, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2338710, "tid": 2338710, + "ts": 6345940055595.344, "dur": 29728.206, + "args": { + "External id": 985450,"Record function id": 0, "Ev Idx": 18350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940055714.319, "dur": 7.478, + "args": { + "External id": 985451,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940055725.683, "dur": 1.542, + "args": { + "External id": 985452,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940055729.363, "dur": 4.087, + "args": { + "External id": 985453,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940055735.487, "dur": 1.218, + "args": { + "External id": 985454,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940055738.459, "dur": 0.940, + "args": { + "External id": 985455,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940055740.903, "dur": 1.038, + "args": { + "External id": 985456,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940055743.627, "dur": 0.889, + "args": { + "External id": 985457,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940055746.641, "dur": 2.466, + "args": { + "External id": 985458,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940055750.819, "dur": 1.183, + "args": { + "External id": 985459,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940055756.063, "dur": 0.947, + "args": { + "External id": 985460,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345940055778.332, "dur": 29486.102, + "args": { + "External id": 985461,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345940055796.711, "dur": 29457.380, + "args": { + "External id": 985462,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940055815.456, "dur": 19.472, + "args": { + "External id": 985463,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345940055839.390, "dur": 29369.345, + "args": { + "External id": 985464,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345940055842.571, "dur": 29365.162, + "args": { + "External id": 985465,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940055848.605, "dur": 6.522, + "args": { + "External id": 985466,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940055857.229, "dur": 29345.793, + "args": { + "External id": 985467,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940085490.177, "dur": 41.657, + "args": { + "External id": 985468,"Sequence number": 10552518, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18368 + } + }, + { + "ph": "s", "id": 401, "pid": 2338710, "tid": 2338710, "ts": 6345940085490.177, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345940085514.197, "dur": 12.395, + "args": { + "External id": 985469,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940085519.853, "dur": 6.513, + "args": { + "External id": 985470,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345940085609.634, "dur": 88.558, + "args": { + "External id": 985471,"Record function id": 0, "Ev Idx": 18371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345940085699.688, "dur": 1343.638, + "args": { + "External id": 985472,"Record function id": 0, "Ev Idx": 18372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940085743.354, "dur": 1281.168, + "args": { + "External id": 985473,"Sequence number": 10552519, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18373 + } + }, + { + "ph": "s", "id": 400, "pid": 2338710, "tid": 2338710, "ts": 6345940085743.354, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345940085826.642, "dur": 58.913, + "args": { + "External id": 985474,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940085901.136, "dur": 135.635, + "args": { + "External id": 985475,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940086097.778, "dur": 59.627, + "args": { + "External id": 985476,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940086171.699, "dur": 38.471, + "args": { + "External id": 985477,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345940086247.687, "dur": 36.712, + "args": { + "External id": 985478,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345940086308.072, "dur": 20.734, + "args": { + "External id": 985479,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345940086356.881, "dur": 157.802, + "args": { + "External id": 985480,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940086417.109, "dur": 14.071, + "args": { + "External id": 985481,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940086424.357, "dur": 5.742, + "args": { + "External id": 985482,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940086434.191, "dur": 5.071, + "args": { + "External id": 985483,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940086440.737, "dur": 1.365, + "args": { + "External id": 985484,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940086445.395, "dur": 6.469, + "args": { + "External id": 985485,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940086526.698, "dur": 60.347, + "args": { + "External id": 985486,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345940086623.502, "dur": 37.991, + "args": { + "External id": 985487,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940086672.303, "dur": 49.839, + "args": { + "External id": 985488,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940086732.963, "dur": 42.469, + "args": { + "External id": 985489,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345940086801.163, "dur": 31.847, + "args": { + "External id": 985490,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940086840.813, "dur": 45.485, + "args": { + "External id": 985491,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345940086907.818, "dur": 24.480, + "args": { + "External id": 985492,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18392 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.23)", "pid": 2338710, "tid": 2338710, + "ts": 6345940087161.594, "dur": 95.753, + "args": { + "External id": 985493,"Record function id": 0, "Ev Idx": 18393 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345940087348.650, "dur": 55.961, + "args": { + "External id": 985494,"Record function id": 0, "Ev Idx": 18394 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.24)", "pid": 2338710, "tid": 2338710, + "ts": 6345940087414.901, "dur": 31727.517, + "args": { + "External id": 985495,"Record function id": 0, "Ev Idx": 18395 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2338710, "tid": 2338710, + "ts": 6345940087424.673, "dur": 1096.114, + "args": { + "External id": 985496,"Record function id": 0, "Ev Idx": 18396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940087521.439, "dur": 11.822, + "args": { + "External id": 985497,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345940087549.863, "dur": 41.583, + "args": { + "External id": 985498,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940087556.100, "dur": 2.410, + "args": { + "External id": 985499,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940087562.922, "dur": 0.565, + "args": { + "External id": 985500,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940087564.946, "dur": 0.845, + "args": { + "External id": 985501,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940087567.167, "dur": 0.700, + "args": { + "External id": 985502,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940087570.886, "dur": 0.558, + "args": { + "External id": 985503,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940087573.338, "dur": 0.863, + "args": { + "External id": 985504,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940087575.767, "dur": 3.980, + "args": { + "External id": 985505,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940087581.835, "dur": 0.444, + "args": { + "External id": 985506,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940087583.762, "dur": 0.529, + "args": { + "External id": 985507,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940087604.997, "dur": 65.964, + "args": { + "External id": 985508,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345940087710.523, "dur": 136.164, + "args": { + "External id": 985509,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940087723.507, "dur": 4.951, + "args": { + "External id": 985510,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345940087734.294, "dur": 12.983, + "args": { + "External id": 985511,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940087739.958, "dur": 6.805, + "args": { + "External id": 985512,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940087744.698, "dur": 0.695, + "args": { + "External id": 985513,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345940087755.383, "dur": 31.075, + "args": { + "External id": 985514,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940087757.935, "dur": 1.593, + "args": { + "External id": 985515,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940087761.053, "dur": 0.697, + "args": { + "External id": 985516,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940087763.280, "dur": 0.510, + "args": { + "External id": 985517,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940087767.024, "dur": 2.885, + "args": { + "External id": 985518,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940087771.228, "dur": 0.534, + "args": { + "External id": 985519,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940087773.228, "dur": 0.665, + "args": { + "External id": 985520,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940087776.321, "dur": 0.354, + "args": { + "External id": 985521,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940087778.048, "dur": 0.416, + "args": { + "External id": 985522,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940087779.983, "dur": 1.981, + "args": { + "External id": 985523,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940087798.847, "dur": 38.791, + "args": { + "External id": 985524,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345940087911.685, "dur": 487.580, + "args": { + "External id": 985525,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345940087946.421, "dur": 446.030, + "args": { + "External id": 985526,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18426, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345940087957.947, "dur": 426.696, + "args": { + "External id": 985527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345940088430.287, "dur": 2.636, + "args": { + "External id": 985528,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18428, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2338710, "tid": 2338710, + "ts": 6345940088544.722, "dur": 30275.302, + "args": { + "External id": 985529,"Record function id": 0, "Ev Idx": 18429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940088664.297, "dur": 8.053, + "args": { + "External id": 985530,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940088676.045, "dur": 1.013, + "args": { + "External id": 985531,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940088678.954, "dur": 3.545, + "args": { + "External id": 985532,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940088684.244, "dur": 0.945, + "args": { + "External id": 985533,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940088686.714, "dur": 1.033, + "args": { + "External id": 985534,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940088689.324, "dur": 0.840, + "args": { + "External id": 985535,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940088694.500, "dur": 1.058, + "args": { + "External id": 985536,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940088697.368, "dur": 2.605, + "args": { + "External id": 985537,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940088701.562, "dur": 0.625, + "args": { + "External id": 985538,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940088704.220, "dur": 1.097, + "args": { + "External id": 985539,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345940088735.018, "dur": 30024.266, + "args": { + "External id": 985540,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345940088753.525, "dur": 29994.706, + "args": { + "External id": 985541,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940088770.084, "dur": 20.737, + "args": { + "External id": 985542,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345940088794.910, "dur": 29908.676, + "args": { + "External id": 985543,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345940088797.856, "dur": 29904.890, + "args": { + "External id": 985544,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940088804.139, "dur": 6.060, + "args": { + "External id": 985545,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940088812.149, "dur": 29885.067, + "args": { + "External id": 985546,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940119021.660, "dur": 81.415, + "args": { + "External id": 985547,"Sequence number": 10552520, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18447 + } + }, + { + "ph": "s", "id": 399, "pid": 2338710, "tid": 2338710, "ts": 6345940119021.660, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345940119081.708, "dur": 15.287, + "args": { + "External id": 985548,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940119087.884, "dur": 7.447, + "args": { + "External id": 985549,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345940119195.575, "dur": 91.656, + "args": { + "External id": 985550,"Record function id": 0, "Ev Idx": 18450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345940119289.512, "dur": 1390.040, + "args": { + "External id": 985551,"Record function id": 0, "Ev Idx": 18451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940119339.735, "dur": 1321.395, + "args": { + "External id": 985552,"Sequence number": 10552521, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18452 + } + }, + { + "ph": "s", "id": 398, "pid": 2338710, "tid": 2338710, "ts": 6345940119339.735, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345940119434.480, "dur": 66.284, + "args": { + "External id": 985553,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940119518.491, "dur": 124.295, + "args": { + "External id": 985554,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940119659.145, "dur": 49.282, + "args": { + "External id": 985555,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940119719.405, "dur": 36.735, + "args": { + "External id": 985556,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345940119789.746, "dur": 34.564, + "args": { + "External id": 985557,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345940119848.056, "dur": 22.526, + "args": { + "External id": 985558,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345940119898.502, "dur": 224.404, + "args": { + "External id": 985559,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940119957.405, "dur": 15.353, + "args": { + "External id": 985560,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940119965.194, "dur": 6.585, + "args": { + "External id": 985561,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940119976.839, "dur": 4.761, + "args": { + "External id": 985562,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940119982.843, "dur": 1.408, + "args": { + "External id": 985563,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940119987.483, "dur": 5.957, + "args": { + "External id": 985564,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940120140.994, "dur": 74.273, + "args": { + "External id": 985565,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345940120256.765, "dur": 38.302, + "args": { + "External id": 985566,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940120307.239, "dur": 54.842, + "args": { + "External id": 985567,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940120375.006, "dur": 42.850, + "args": { + "External id": 985568,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345940120451.628, "dur": 35.672, + "args": { + "External id": 985569,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940120494.665, "dur": 42.916, + "args": { + "External id": 985570,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345940120560.135, "dur": 22.820, + "args": { + "External id": 985571,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18471 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.24)", "pid": 2338710, "tid": 2338710, + "ts": 6345940120759.728, "dur": 92.061, + "args": { + "External id": 985572,"Record function id": 0, "Ev Idx": 18472 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338710, "tid": 2338710, + "ts": 6345940120943.216, "dur": 55.455, + "args": { + "External id": 985573,"Record function id": 0, "Ev Idx": 18473 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.25)", "pid": 2338710, "tid": 2338710, + "ts": 6345940121031.147, "dur": 29995.165, + "args": { + "External id": 985574,"Record function id": 0, "Ev Idx": 18474 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.25)", "pid": 2338710, "tid": 2338710, + "ts": 6345940121042.928, "dur": 1200.938, + "args": { + "External id": 985575,"Record function id": 0, "Ev Idx": 18475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940121194.286, "dur": 16.239, + "args": { + "External id": 985576,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345940121228.381, "dur": 42.345, + "args": { + "External id": 985577,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940121234.910, "dur": 2.844, + "args": { + "External id": 985578,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940121242.469, "dur": 0.541, + "args": { + "External id": 985579,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940121244.383, "dur": 0.628, + "args": { + "External id": 985580,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940121246.881, "dur": 0.598, + "args": { + "External id": 985581,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940121250.079, "dur": 0.764, + "args": { + "External id": 985582,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940121253.381, "dur": 0.688, + "args": { + "External id": 985583,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940121255.488, "dur": 4.471, + "args": { + "External id": 985584,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940121261.708, "dur": 0.301, + "args": { + "External id": 985585,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940121263.245, "dur": 0.448, + "args": { + "External id": 985586,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940121284.307, "dur": 68.860, + "args": { + "External id": 985587,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338710, "tid": 2338710, + "ts": 6345940121395.787, "dur": 158.486, + "args": { + "External id": 985588,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "4", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940121410.307, "dur": 4.442, + "args": { + "External id": 985589,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338710, "tid": 2338710, + "ts": 6345940121420.536, "dur": 12.168, + "args": { + "External id": 985590,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940121425.791, "dur": 6.390, + "args": { + "External id": 985591,"Record function id": 0, "Concrete Inputs": ["", "0", "109056000", "136320000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940121430.051, "dur": 0.479, + "args": { + "External id": 985592,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338710, "tid": 2338710, + "ts": 6345940121440.947, "dur": 51.262, + "args": { + "External id": 985593,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940121443.138, "dur": 1.890, + "args": { + "External id": 985594,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "109056000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940121446.828, "dur": 0.493, + "args": { + "External id": 985595,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "109056512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940121448.872, "dur": 0.581, + "args": { + "External id": 985596,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111153664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940121452.835, "dur": 2.330, + "args": { + "External id": 985597,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "111677952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940121456.424, "dur": 0.699, + "args": { + "External id": 985598,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "112202240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940121458.910, "dur": 0.538, + "args": { + "External id": 985599,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "114299392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940121465.336, "dur": 0.321, + "args": { + "External id": 985600,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "114299904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940121483.816, "dur": 0.511, + "args": { + "External id": 985601,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "121639936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940121485.519, "dur": 1.893, + "args": { + "External id": 985602,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "128979968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940121505.988, "dur": 38.697, + "args": { + "External id": 985603,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338710, "tid": 2338710, + "ts": 6345940121618.686, "dur": 498.227, + "args": { + "External id": 985604,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345940121655.587, "dur": 454.755, + "args": { + "External id": 985605,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 4, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18505, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338710, "tid": 2338710, + "ts": 6345940121667.805, "dur": 430.254, + "args": { + "External id": 985606,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345940122150.607, "dur": 3.726, + "args": { + "External id": 985607,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18507, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.25)", "pid": 2338710, "tid": 2338710, + "ts": 6345940122268.551, "dur": 28505.772, + "args": { + "External id": 985608,"Record function id": 0, "Ev Idx": 18508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940122393.804, "dur": 7.618, + "args": { + "External id": 985609,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940122405.538, "dur": 1.334, + "args": { + "External id": 985610,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940122408.959, "dur": 3.708, + "args": { + "External id": 985611,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940122414.446, "dur": 1.026, + "args": { + "External id": 985612,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940122417.166, "dur": 0.681, + "args": { + "External id": 985613,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940122419.268, "dur": 0.974, + "args": { + "External id": 985614,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940122422.062, "dur": 1.080, + "args": { + "External id": 985615,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940122425.093, "dur": 2.435, + "args": { + "External id": 985616,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940122429.098, "dur": 0.927, + "args": { + "External id": 985617,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940122434.083, "dur": 0.989, + "args": { + "External id": 985618,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345940122457.297, "dur": 28263.772, + "args": { + "External id": 985619,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345940122475.394, "dur": 28235.666, + "args": { + "External id": 985620,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940122519.028, "dur": 19.818, + "args": { + "External id": 985621,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345940122543.974, "dur": 28121.859, + "args": { + "External id": 985622,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345940122547.156, "dur": 28117.022, + "args": { + "External id": 985623,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940122553.257, "dur": 7.231, + "args": { + "External id": 985624,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940122562.507, "dur": 28097.650, + "args": { + "External id": 985625,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940150945.754, "dur": 37.765, + "args": { + "External id": 985626,"Sequence number": 10552522, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18526 + } + }, + { + "ph": "s", "id": 397, "pid": 2338710, "tid": 2338710, "ts": 6345940150945.754, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345940150966.423, "dur": 11.945, + "args": { + "External id": 985627,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940150971.959, "dur": 6.202, + "args": { + "External id": 985628,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345940151108.084, "dur": 94.360, + "args": { + "External id": 985629,"Record function id": 0, "Ev Idx": 18529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338710, "tid": 2338710, + "ts": 6345940151204.345, "dur": 1328.310, + "args": { + "External id": 985630,"Record function id": 0, "Ev Idx": 18530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940151250.267, "dur": 1266.285, + "args": { + "External id": 985631,"Sequence number": 10552523, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18531 + } + }, + { + "ph": "s", "id": 396, "pid": 2338710, "tid": 2338710, "ts": 6345940151250.267, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345940151336.205, "dur": 62.157, + "args": { + "External id": 985632,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940151415.733, "dur": 121.957, + "args": { + "External id": 985633,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940151552.941, "dur": 43.628, + "args": { + "External id": 985634,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940151606.433, "dur": 33.536, + "args": { + "External id": 985635,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338710, "tid": 2338710, + "ts": 6345940151669.071, "dur": 31.480, + "args": { + "External id": 985636,"kernel_hash": "cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/d5/cd5ftgdvg3zpepgpcumeff4mou3c2fy27oihfnqypjiirszq7kos.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338710, "tid": 2338710, + "ts": 6345940151727.972, "dur": 20.740, + "args": { + "External id": 985637,"kernel_hash": "cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/yr/cyroqezuoeuamd3jqkugypuyres5u7ufws5x3bgos5atsczyhyk3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345940151774.930, "dur": 158.432, + "args": { + "External id": 985638,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940151835.164, "dur": 15.906, + "args": { + "External id": 985639,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940151842.430, "dur": 7.574, + "args": { + "External id": 985640,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940151855.321, "dur": 4.849, + "args": { + "External id": 985641,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940151861.726, "dur": 1.242, + "args": { + "External id": 985642,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940151866.598, "dur": 5.921, + "args": { + "External id": 985643,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940151944.980, "dur": 58.492, + "args": { + "External id": 985644,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338710, "tid": 2338710, + "ts": 6345940152107.324, "dur": 39.786, + "args": { + "External id": 985645,"kernel_hash": "c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/5x/c5xfababnpqt3wuhdecdmhwvhcvmdznprunosm6exvj2vpztzphf.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940152162.268, "dur": 58.112, + "args": { + "External id": 985646,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940152232.851, "dur": 42.891, + "args": { + "External id": 985647,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338710, "tid": 2338710, + "ts": 6345940152302.875, "dur": 34.999, + "args": { + "External id": 985648,"kernel_hash": "cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/uy/cuyinmnu22ra2lhbf5b3kn2pvmpzmlkeucl5ymtesvmjkft4vxxm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940152346.959, "dur": 46.231, + "args": { + "External id": 985649,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338710, "tid": 2338710, + "ts": 6345940152417.241, "dur": 22.506, + "args": { + "External id": 985650,"kernel_hash": "cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qo/cqo6qtnieavgrnue4llkfxac3bydsk5p7r22potmkpxvtg3jxrsu.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18550 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.25)", "pid": 2338710, "tid": 2338710, + "ts": 6345940152609.125, "dur": 42.634, + "args": { + "External id": 985651,"Record function id": 0, "Ev Idx": 18551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940152811.726, "dur": 399.242, + "args": { + "External id": 985652,"Sequence number": 10552524, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 18552 + } + }, + { + "ph": "s", "id": 395, "pid": 2338710, "tid": 2338710, "ts": 6345940152811.726, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940152849.302, "dur": 9.030, + "args": { + "External id": 985653,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940152851.813, "dur": 6.182, + "args": { + "External id": 985654,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940152869.873, "dur": 14.448, + "args": { + "External id": 985655,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940152874.986, "dur": 8.553, + "args": { + "External id": 985656,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940152895.339, "dur": 5.925, + "args": { + "External id": 985657,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940153184.209, "dur": 9.212, + "args": { + "External id": 985658,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940153187.570, "dur": 5.272, + "args": { + "External id": 985659,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940153245.394, "dur": 160.437, + "args": { + "External id": 985660,"Sequence number": 10552525, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940153248.335, "dur": 17.566, + "args": { + "External id": 985661,"Sequence number": 10552525, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18561 + } + }, + { + "ph": "s", "id": 394, "pid": 2338710, "tid": 2338710, "ts": 6345940153248.335, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940153254.341, "dur": 9.634, + "args": { + "External id": 985662,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940153260.846, "dur": 2.730, + "args": { + "External id": 985663,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940153268.295, "dur": 137.168, + "args": { + "External id": 985664,"Sequence number": 10552526, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940153271.696, "dur": 6.019, + "args": { + "External id": 985665,"Sequence number": 10552526, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940153272.631, "dur": 4.923, + "args": { + "External id": 985666,"Sequence number": 10552526, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18566 + } + }, + { + "ph": "s", "id": 393, "pid": 2338710, "tid": 2338710, "ts": 6345940153272.631, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940153282.042, "dur": 110.675, + "args": { + "External id": 985667,"Sequence number": 10552527, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18567 + } + }, + { + "ph": "s", "id": 392, "pid": 2338710, "tid": 2338710, "ts": 6345940153282.042, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940153396.904, "dur": 7.251, + "args": { + "External id": 985668,"Sequence number": 10552528, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18568 + } + }, + { + "ph": "s", "id": 391, "pid": 2338710, "tid": 2338710, "ts": 6345940153396.904, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940153417.250, "dur": 78.362, + "args": { + "External id": 985669,"Sequence number": 10552529, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940153418.182, "dur": 9.774, + "args": { + "External id": 985670,"Sequence number": 10552529, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18570 + } + }, + { + "ph": "s", "id": 390, "pid": 2338710, "tid": 2338710, "ts": 6345940153418.182, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940153420.725, "dur": 5.961, + "args": { + "External id": 985671,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940153425.501, "dur": 0.999, + "args": { + "External id": 985672,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940153428.853, "dur": 66.433, + "args": { + "External id": 985673,"Sequence number": 10552530, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940153430.722, "dur": 5.192, + "args": { + "External id": 985674,"Sequence number": 10552530, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940153431.693, "dur": 4.004, + "args": { + "External id": 985675,"Sequence number": 10552530, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18575 + } + }, + { + "ph": "s", "id": 389, "pid": 2338710, "tid": 2338710, "ts": 6345940153431.693, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940153436.708, "dur": 51.816, + "args": { + "External id": 985676,"Sequence number": 10552531, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18576 + } + }, + { + "ph": "s", "id": 388, "pid": 2338710, "tid": 2338710, "ts": 6345940153436.708, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940153490.346, "dur": 4.482, + "args": { + "External id": 985677,"Sequence number": 10552532, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18577 + } + }, + { + "ph": "s", "id": 387, "pid": 2338710, "tid": 2338710, "ts": 6345940153490.346, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940153504.026, "dur": 73.043, + "args": { + "External id": 985678,"Sequence number": 10552533, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940153504.858, "dur": 6.422, + "args": { + "External id": 985679,"Sequence number": 10552533, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18579 + } + }, + { + "ph": "s", "id": 386, "pid": 2338710, "tid": 2338710, "ts": 6345940153504.858, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940153507.215, "dur": 2.787, + "args": { + "External id": 985680,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940153509.260, "dur": 0.553, + "args": { + "External id": 985681,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940153514.254, "dur": 62.515, + "args": { + "External id": 985682,"Sequence number": 10552534, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940153515.216, "dur": 5.957, + "args": { + "External id": 985683,"Sequence number": 10552534, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940153516.443, "dur": 4.563, + "args": { + "External id": 985684,"Sequence number": 10552534, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18584 + } + }, + { + "ph": "s", "id": 385, "pid": 2338710, "tid": 2338710, "ts": 6345940153516.443, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940153521.855, "dur": 45.102, + "args": { + "External id": 985685,"Sequence number": 10552535, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18585 + } + }, + { + "ph": "s", "id": 384, "pid": 2338710, "tid": 2338710, "ts": 6345940153521.855, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940153569.394, "dur": 6.946, + "args": { + "External id": 985686,"Sequence number": 10552536, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18586 + } + }, + { + "ph": "s", "id": 383, "pid": 2338710, "tid": 2338710, "ts": 6345940153569.394, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940153604.089, "dur": 4.387, + "args": { + "External id": 985687,"Sequence number": 10552537, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940153605.329, "dur": 2.994, + "args": { + "External id": 985688,"Sequence number": 10552537, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18588 + } + }, + { + "ph": "s", "id": 382, "pid": 2338710, "tid": 2338710, "ts": 6345940153605.329, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940153617.541, "dur": 6.174, + "args": { + "External id": 985689,"Sequence number": 10552538, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940153618.690, "dur": 4.851, + "args": { + "External id": 985690,"Sequence number": 10552538, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18590 + } + }, + { + "ph": "s", "id": 381, "pid": 2338710, "tid": 2338710, "ts": 6345940153618.690, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940153630.869, "dur": 4.520, + "args": { + "External id": 985691,"Sequence number": 10552539, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940153631.985, "dur": 3.242, + "args": { + "External id": 985692,"Sequence number": 10552539, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18592 + } + }, + { + "ph": "s", "id": 380, "pid": 2338710, "tid": 2338710, "ts": 6345940153631.985, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940153678.316, "dur": 209.016, + "args": { + "External id": 985693,"Sequence number": 10552540, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18593 + } + }, + { + "ph": "s", "id": 379, "pid": 2338710, "tid": 2338710, "ts": 6345940153678.316, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940153704.058, "dur": 10.934, + "args": { + "External id": 985694,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940153707.679, "dur": 6.750, + "args": { + "External id": 985695,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940153902.293, "dur": 198.553, + "args": { + "External id": 985696,"Sequence number": 10552541, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18596 + } + }, + { + "ph": "s", "id": 378, "pid": 2338710, "tid": 2338710, "ts": 6345940153902.293, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940153918.988, "dur": 9.515, + "args": { + "External id": 985697,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 18597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940153922.506, "dur": 5.522, + "args": { + "External id": 985698,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338710, "tid": 2338710, + "ts": 6345940154139.539, "dur": 230.301, + "args": { + "External id": 985699,"Sequence number": 10552542, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 18599 + } + }, + { + "ph": "s", "id": 377, "pid": 2338710, "tid": 2338710, "ts": 6345940154139.539, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345940154178.195, "dur": 158.251, + "args": { + "External id": 985700,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940154242.459, "dur": 10.775, + "args": { + "External id": 985701,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940154246.277, "dur": 6.400, + "args": { + "External id": 985702,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940154256.101, "dur": 4.831, + "args": { + "External id": 985703,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940154262.796, "dur": 1.295, + "args": { + "External id": 985704,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940154268.598, "dur": 4.866, + "args": { + "External id": 985705,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2338710, + "ts": 6345940154352.852, "dur": 6.373, + "args": { + "External id": 985706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 18606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940154376.801, "dur": 7.578, + "args": { + "External id": 985707,"Sequence number": 10552543, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940154378.640, "dur": 5.551, + "args": { + "External id": 985708,"Sequence number": 10552543, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18608 + } + }, + { + "ph": "s", "id": 376, "pid": 2338710, "tid": 2338710, "ts": 6345940154378.640, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940154399.305, "dur": 130.210, + "args": { + "External id": 985709,"Sequence number": 10552544, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940154402.925, "dur": 10.927, + "args": { + "External id": 985710,"Sequence number": 10552544, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18610 + } + }, + { + "ph": "s", "id": 375, "pid": 2338710, "tid": 2338710, "ts": 6345940154402.925, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940154406.778, "dur": 5.742, + "args": { + "External id": 985711,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940154409.832, "dur": 2.316, + "args": { + "External id": 985712,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940154415.585, "dur": 113.438, + "args": { + "External id": 985713,"Sequence number": 10552545, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940154417.809, "dur": 5.588, + "args": { + "External id": 985714,"Sequence number": 10552545, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940154421.008, "dur": 2.236, + "args": { + "External id": 985715,"Sequence number": 10552545, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18615 + } + }, + { + "ph": "s", "id": 374, "pid": 2338710, "tid": 2338710, "ts": 6345940154421.008, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940154424.492, "dur": 96.887, + "args": { + "External id": 985716,"Sequence number": 10552546, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18616 + } + }, + { + "ph": "s", "id": 373, "pid": 2338710, "tid": 2338710, "ts": 6345940154424.492, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940154524.574, "dur": 3.489, + "args": { + "External id": 985717,"Sequence number": 10552547, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18617 + } + }, + { + "ph": "s", "id": 372, "pid": 2338710, "tid": 2338710, "ts": 6345940154524.574, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940154572.177, "dur": 274.462, + "args": { + "External id": 985718,"Sequence number": 10552548, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 18618 + } + }, + { + "ph": "s", "id": 371, "pid": 2338710, "tid": 2338710, "ts": 6345940154572.177, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940154595.813, "dur": 3.541, + "args": { + "External id": 985719,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940154596.790, "dur": 2.280, + "args": { + "External id": 985720,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338710, "tid": 2338710, + "ts": 6345940154609.429, "dur": 6.868, + "args": { + "External id": 985721,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 18621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940154610.902, "dur": 5.262, + "args": { + "External id": 985722,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940154612.077, "dur": 3.972, + "args": { + "External id": 985723,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940154626.085, "dur": 10.574, + "args": { + "External id": 985724,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940154631.211, "dur": 5.096, + "args": { + "External id": 985725,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940154644.180, "dur": 3.502, + "args": { + "External id": 985726,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940154651.500, "dur": 4.905, + "args": { + "External id": 985727,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940154821.132, "dur": 4.077, + "args": { + "External id": 985728,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940154822.550, "dur": 2.377, + "args": { + "External id": 985729,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940154828.258, "dur": 5.145, + "args": { + "External id": 985730,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940154832.194, "dur": 1.092, + "args": { + "External id": 985731,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940154870.410, "dur": 124.486, + "args": { + "External id": 985732,"Sequence number": 10552549, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940154871.520, "dur": 10.495, + "args": { + "External id": 985733,"Sequence number": 10552549, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18633 + } + }, + { + "ph": "s", "id": 370, "pid": 2338710, "tid": 2338710, "ts": 6345940154871.520, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940154874.581, "dur": 6.218, + "args": { + "External id": 985734,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940154877.200, "dur": 3.254, + "args": { + "External id": 985735,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940154883.270, "dur": 111.285, + "args": { + "External id": 985736,"Sequence number": 10552550, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940154888.133, "dur": 3.929, + "args": { + "External id": 985737,"Sequence number": 10552550, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940154889.135, "dur": 2.765, + "args": { + "External id": 985738,"Sequence number": 10552550, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18638 + } + }, + { + "ph": "s", "id": 369, "pid": 2338710, "tid": 2338710, "ts": 6345940154889.135, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940154893.220, "dur": 90.233, + "args": { + "External id": 985739,"Sequence number": 10552551, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18639 + } + }, + { + "ph": "s", "id": 368, "pid": 2338710, "tid": 2338710, "ts": 6345940154893.220, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940154986.684, "dur": 7.045, + "args": { + "External id": 985740,"Sequence number": 10552552, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18640 + } + }, + { + "ph": "s", "id": 367, "pid": 2338710, "tid": 2338710, "ts": 6345940154986.684, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940155006.540, "dur": 165.900, + "args": { + "External id": 985741,"Sequence number": 10552553, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940155031.039, "dur": 10.329, + "args": { + "External id": 985742,"Sequence number": 10552553, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18642 + } + }, + { + "ph": "s", "id": 366, "pid": 2338710, "tid": 2338710, "ts": 6345940155031.039, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940155034.861, "dur": 4.847, + "args": { + "External id": 985743,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940155037.630, "dur": 1.673, + "args": { + "External id": 985744,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940155042.287, "dur": 129.808, + "args": { + "External id": 985745,"Sequence number": 10552554, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940155045.734, "dur": 44.106, + "args": { + "External id": 985746,"Sequence number": 10552554, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940155046.964, "dur": 42.183, + "args": { + "External id": 985747,"Sequence number": 10552554, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18647 + } + }, + { + "ph": "s", "id": 365, "pid": 2338710, "tid": 2338710, "ts": 6345940155046.964, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940155091.637, "dur": 73.647, + "args": { + "External id": 985748,"Sequence number": 10552555, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18648 + } + }, + { + "ph": "s", "id": 364, "pid": 2338710, "tid": 2338710, "ts": 6345940155091.637, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940155168.253, "dur": 3.075, + "args": { + "External id": 985749,"Sequence number": 10552556, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18649 + } + }, + { + "ph": "s", "id": 363, "pid": 2338710, "tid": 2338710, "ts": 6345940155168.253, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940155207.535, "dur": 201.687, + "args": { + "External id": 985750,"Sequence number": 10552557, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18650 + } + }, + { + "ph": "s", "id": 362, "pid": 2338710, "tid": 2338710, "ts": 6345940155207.535, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940155264.264, "dur": 5.427, + "args": { + "External id": 985751,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940155311.412, "dur": 81.572, + "args": { + "External id": 985752,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940155312.336, "dur": 8.454, + "args": { + "External id": 985753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 18653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940155314.161, "dur": 5.590, + "args": { + "External id": 985754,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 18654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940155317.017, "dur": 2.432, + "args": { + "External id": 985755,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 18655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940155321.767, "dur": 70.747, + "args": { + "External id": 985756,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 18656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940155325.941, "dur": 3.015, + "args": { + "External id": 985757,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940155327.136, "dur": 1.672, + "args": { + "External id": 985758,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940155329.783, "dur": 58.164, + "args": { + "External id": 985759,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 18659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940155390.376, "dur": 1.459, + "args": { + "External id": 985760,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2338710, + "ts": 6345940155422.724, "dur": 31.572, + "args": { + "External id": 985761,"Sequence number": 10552558, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 18661 + } + }, + { + "ph": "s", "id": 361, "pid": 2338710, "tid": 2338710, "ts": 6345940155422.724, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940155497.734, "dur": 222.428, + "args": { + "External id": 985762,"Sequence number": 10552559, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 18662 + } + }, + { + "ph": "s", "id": 360, "pid": 2338710, "tid": 2338710, "ts": 6345940155497.734, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940155521.630, "dur": 5.043, + "args": { + "External id": 985763,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940155523.231, "dur": 3.204, + "args": { + "External id": 985764,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940155536.752, "dur": 8.463, + "args": { + "External id": 985765,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940155540.350, "dur": 4.503, + "args": { + "External id": 985766,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940155552.614, "dur": 3.684, + "args": { + "External id": 985767,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940155705.836, "dur": 3.325, + "args": { + "External id": 985768,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940155706.980, "dur": 1.905, + "args": { + "External id": 985769,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940155740.592, "dur": 102.064, + "args": { + "External id": 985770,"Sequence number": 10552560, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940155744.092, "dur": 7.750, + "args": { + "External id": 985771,"Sequence number": 10552560, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18671 + } + }, + { + "ph": "s", "id": 359, "pid": 2338710, "tid": 2338710, "ts": 6345940155744.092, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940155746.335, "dur": 3.920, + "args": { + "External id": 985772,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940155748.507, "dur": 1.462, + "args": { + "External id": 985773,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940155752.886, "dur": 89.399, + "args": { + "External id": 985774,"Sequence number": 10552561, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940155754.320, "dur": 10.856, + "args": { + "External id": 985775,"Sequence number": 10552561, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940155758.049, "dur": 6.905, + "args": { + "External id": 985776,"Sequence number": 10552561, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18676 + } + }, + { + "ph": "s", "id": 358, "pid": 2338710, "tid": 2338710, "ts": 6345940155758.049, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940155766.170, "dur": 70.252, + "args": { + "External id": 985777,"Sequence number": 10552562, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18677 + } + }, + { + "ph": "s", "id": 357, "pid": 2338710, "tid": 2338710, "ts": 6345940155766.170, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940155839.063, "dur": 2.620, + "args": { + "External id": 985778,"Sequence number": 10552563, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18678 + } + }, + { + "ph": "s", "id": 356, "pid": 2338710, "tid": 2338710, "ts": 6345940155839.063, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940155851.589, "dur": 74.662, + "args": { + "External id": 985779,"Sequence number": 10552564, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940155852.116, "dur": 10.805, + "args": { + "External id": 985780,"Sequence number": 10552564, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18680 + } + }, + { + "ph": "s", "id": 355, "pid": 2338710, "tid": 2338710, "ts": 6345940155852.116, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940155857.581, "dur": 4.115, + "args": { + "External id": 985781,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940155860.768, "dur": 0.757, + "args": { + "External id": 985782,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940155863.931, "dur": 61.959, + "args": { + "External id": 985783,"Sequence number": 10552565, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940155865.146, "dur": 6.565, + "args": { + "External id": 985784,"Sequence number": 10552565, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940155868.561, "dur": 3.004, + "args": { + "External id": 985785,"Sequence number": 10552565, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18685 + } + }, + { + "ph": "s", "id": 354, "pid": 2338710, "tid": 2338710, "ts": 6345940155868.561, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940155872.463, "dur": 45.751, + "args": { + "External id": 985786,"Sequence number": 10552566, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18686 + } + }, + { + "ph": "s", "id": 353, "pid": 2338710, "tid": 2338710, "ts": 6345940155872.463, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940155920.200, "dur": 4.963, + "args": { + "External id": 985787,"Sequence number": 10552567, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18687 + } + }, + { + "ph": "s", "id": 352, "pid": 2338710, "tid": 2338710, "ts": 6345940155920.200, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940155935.142, "dur": 68.659, + "args": { + "External id": 985788,"Sequence number": 10552568, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940155935.938, "dur": 8.138, + "args": { + "External id": 985789,"Sequence number": 10552568, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18689 + } + }, + { + "ph": "s", "id": 351, "pid": 2338710, "tid": 2338710, "ts": 6345940155935.938, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940155937.699, "dur": 5.149, + "args": { + "External id": 985790,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940155941.811, "dur": 0.878, + "args": { + "External id": 985791,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940155944.809, "dur": 58.576, + "args": { + "External id": 985792,"Sequence number": 10552569, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940155945.975, "dur": 7.848, + "args": { + "External id": 985793,"Sequence number": 10552569, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940155947.001, "dur": 6.679, + "args": { + "External id": 985794,"Sequence number": 10552569, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18694 + } + }, + { + "ph": "s", "id": 350, "pid": 2338710, "tid": 2338710, "ts": 6345940155947.001, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940155956.922, "dur": 41.140, + "args": { + "External id": 985795,"Sequence number": 10552570, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18695 + } + }, + { + "ph": "s", "id": 349, "pid": 2338710, "tid": 2338710, "ts": 6345940155956.922, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940156000.061, "dur": 2.846, + "args": { + "External id": 985796,"Sequence number": 10552571, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18696 + } + }, + { + "ph": "s", "id": 348, "pid": 2338710, "tid": 2338710, "ts": 6345940156000.061, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940156045.548, "dur": 45.444, + "args": { + "External id": 985797,"Sequence number": 10552572, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940156047.081, "dur": 43.178, + "args": { + "External id": 985798,"Sequence number": 10552572, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18698 + } + }, + { + "ph": "s", "id": 347, "pid": 2338710, "tid": 2338710, "ts": 6345940156047.081, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940156104.266, "dur": 8.966, + "args": { + "External id": 985799,"Sequence number": 10552573, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940156108.158, "dur": 4.892, + "args": { + "External id": 985800,"Sequence number": 10552573, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18700 + } + }, + { + "ph": "s", "id": 346, "pid": 2338710, "tid": 2338710, "ts": 6345940156108.158, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940156118.123, "dur": 3.566, + "args": { + "External id": 985801,"Sequence number": 10552574, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940156119.255, "dur": 2.237, + "args": { + "External id": 985802,"Sequence number": 10552574, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18702 + } + }, + { + "ph": "s", "id": 345, "pid": 2338710, "tid": 2338710, "ts": 6345940156119.255, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940156157.945, "dur": 192.675, + "args": { + "External id": 985803,"Sequence number": 10552575, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18703 + } + }, + { + "ph": "s", "id": 344, "pid": 2338710, "tid": 2338710, "ts": 6345940156157.945, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940156180.972, "dur": 11.635, + "args": { + "External id": 985804,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940156184.424, "dur": 7.499, + "args": { + "External id": 985805,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940156367.401, "dur": 128.663, + "args": { + "External id": 985806,"Sequence number": 10552576, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18706 + } + }, + { + "ph": "s", "id": 343, "pid": 2338710, "tid": 2338710, "ts": 6345940156367.401, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940156384.099, "dur": 8.476, + "args": { + "External id": 985807,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 18707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940156386.669, "dur": 5.256, + "args": { + "External id": 985808,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338710, "tid": 2338710, + "ts": 6345940156528.445, "dur": 215.325, + "args": { + "External id": 985809,"Sequence number": 10552577, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 18709 + } + }, + { + "ph": "s", "id": 342, "pid": 2338710, "tid": 2338710, "ts": 6345940156528.445, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345940156563.740, "dur": 152.263, + "args": { + "External id": 985810,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940156620.090, "dur": 8.127, + "args": { + "External id": 985811,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940156623.114, "dur": 4.623, + "args": { + "External id": 985812,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940156631.354, "dur": 4.345, + "args": { + "External id": 985813,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940156643.921, "dur": 1.272, + "args": { + "External id": 985814,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940156649.628, "dur": 4.155, + "args": { + "External id": 985815,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2338710, + "ts": 6345940156728.485, "dur": 5.037, + "args": { + "External id": 985816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 18716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940156750.126, "dur": 8.027, + "args": { + "External id": 985817,"Sequence number": 10552578, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940156751.581, "dur": 6.392, + "args": { + "External id": 985818,"Sequence number": 10552578, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18718 + } + }, + { + "ph": "s", "id": 341, "pid": 2338710, "tid": 2338710, "ts": 6345940156751.581, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940156770.921, "dur": 122.241, + "args": { + "External id": 985819,"Sequence number": 10552579, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940156771.859, "dur": 8.535, + "args": { + "External id": 985820,"Sequence number": 10552579, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18720 + } + }, + { + "ph": "s", "id": 340, "pid": 2338710, "tid": 2338710, "ts": 6345940156771.859, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940156774.247, "dur": 4.982, + "args": { + "External id": 985821,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940156777.035, "dur": 1.875, + "args": { + "External id": 985822,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940156781.932, "dur": 110.815, + "args": { + "External id": 985823,"Sequence number": 10552580, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940156786.732, "dur": 4.099, + "args": { + "External id": 985824,"Sequence number": 10552580, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940156787.593, "dur": 3.071, + "args": { + "External id": 985825,"Sequence number": 10552580, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18725 + } + }, + { + "ph": "s", "id": 339, "pid": 2338710, "tid": 2338710, "ts": 6345940156787.593, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940156792.049, "dur": 90.564, + "args": { + "External id": 985826,"Sequence number": 10552581, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18726 + } + }, + { + "ph": "s", "id": 338, "pid": 2338710, "tid": 2338710, "ts": 6345940156792.049, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940156885.330, "dur": 6.582, + "args": { + "External id": 985827,"Sequence number": 10552582, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18727 + } + }, + { + "ph": "s", "id": 337, "pid": 2338710, "tid": 2338710, "ts": 6345940156885.330, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940156933.352, "dur": 320.024, + "args": { + "External id": 985828,"Sequence number": 10552583, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 18728 + } + }, + { + "ph": "s", "id": 336, "pid": 2338710, "tid": 2338710, "ts": 6345940156933.352, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940156953.873, "dur": 3.146, + "args": { + "External id": 985829,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940156954.904, "dur": 1.954, + "args": { + "External id": 985830,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338710, "tid": 2338710, + "ts": 6345940156962.570, "dur": 6.116, + "args": { + "External id": 985831,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 18731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940156963.697, "dur": 4.853, + "args": { + "External id": 985832,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940156967.226, "dur": 1.209, + "args": { + "External id": 985833,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940156976.965, "dur": 7.579, + "args": { + "External id": 985834,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940156979.553, "dur": 4.646, + "args": { + "External id": 985835,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940156991.392, "dur": 3.344, + "args": { + "External id": 985836,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940157000.732, "dur": 4.349, + "args": { + "External id": 985837,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940157224.912, "dur": 5.528, + "args": { + "External id": 985838,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940157226.357, "dur": 3.694, + "args": { + "External id": 985839,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940157233.911, "dur": 2.735, + "args": { + "External id": 985840,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940157235.230, "dur": 1.286, + "args": { + "External id": 985841,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940157275.077, "dur": 124.221, + "args": { + "External id": 985842,"Sequence number": 10552584, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940157276.153, "dur": 12.241, + "args": { + "External id": 985843,"Sequence number": 10552584, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18743 + } + }, + { + "ph": "s", "id": 335, "pid": 2338710, "tid": 2338710, "ts": 6345940157276.153, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940157279.693, "dur": 7.370, + "args": { + "External id": 985844,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940157284.860, "dur": 1.858, + "args": { + "External id": 985845,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940157289.726, "dur": 109.181, + "args": { + "External id": 985846,"Sequence number": 10552585, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940157291.686, "dur": 7.575, + "args": { + "External id": 985847,"Sequence number": 10552585, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940157292.740, "dur": 6.356, + "args": { + "External id": 985848,"Sequence number": 10552585, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18748 + } + }, + { + "ph": "s", "id": 334, "pid": 2338710, "tid": 2338710, "ts": 6345940157292.740, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940157300.312, "dur": 90.095, + "args": { + "External id": 985849,"Sequence number": 10552586, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18749 + } + }, + { + "ph": "s", "id": 333, "pid": 2338710, "tid": 2338710, "ts": 6345940157300.312, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940157393.020, "dur": 5.147, + "args": { + "External id": 985850,"Sequence number": 10552587, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18750 + } + }, + { + "ph": "s", "id": 332, "pid": 2338710, "tid": 2338710, "ts": 6345940157393.020, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940157408.802, "dur": 78.729, + "args": { + "External id": 985851,"Sequence number": 10552588, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940157409.447, "dur": 6.820, + "args": { + "External id": 985852,"Sequence number": 10552588, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18752 + } + }, + { + "ph": "s", "id": 331, "pid": 2338710, "tid": 2338710, "ts": 6345940157409.447, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940157411.447, "dur": 3.385, + "args": { + "External id": 985853,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940157413.723, "dur": 0.893, + "args": { + "External id": 985854,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940157420.036, "dur": 67.219, + "args": { + "External id": 985855,"Sequence number": 10552589, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940157421.201, "dur": 5.889, + "args": { + "External id": 985856,"Sequence number": 10552589, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940157422.340, "dur": 4.595, + "args": { + "External id": 985857,"Sequence number": 10552589, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18757 + } + }, + { + "ph": "s", "id": 330, "pid": 2338710, "tid": 2338710, "ts": 6345940157422.340, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940157427.664, "dur": 53.183, + "args": { + "External id": 985858,"Sequence number": 10552590, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18758 + } + }, + { + "ph": "s", "id": 329, "pid": 2338710, "tid": 2338710, "ts": 6345940157427.664, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940157482.731, "dur": 4.109, + "args": { + "External id": 985859,"Sequence number": 10552591, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18759 + } + }, + { + "ph": "s", "id": 328, "pid": 2338710, "tid": 2338710, "ts": 6345940157482.731, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940157512.098, "dur": 177.417, + "args": { + "External id": 985860,"Sequence number": 10552592, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18760 + } + }, + { + "ph": "s", "id": 327, "pid": 2338710, "tid": 2338710, "ts": 6345940157512.098, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940157555.763, "dur": 5.458, + "args": { + "External id": 985861,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940157602.001, "dur": 72.423, + "args": { + "External id": 985862,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940157602.994, "dur": 9.472, + "args": { + "External id": 985863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 18763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940157604.377, "dur": 7.118, + "args": { + "External id": 985864,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 18764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940157610.264, "dur": 1.032, + "args": { + "External id": 985865,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 18765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940157613.460, "dur": 60.518, + "args": { + "External id": 985866,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 18766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940157614.711, "dur": 2.807, + "args": { + "External id": 985867,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940157616.045, "dur": 1.329, + "args": { + "External id": 985868,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940157618.295, "dur": 51.756, + "args": { + "External id": 985869,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 18769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940157672.036, "dur": 1.330, + "args": { + "External id": 985870,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2338710, + "ts": 6345940157699.081, "dur": 33.209, + "args": { + "External id": 985871,"Sequence number": 10552593, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 18771 + } + }, + { + "ph": "s", "id": 326, "pid": 2338710, "tid": 2338710, "ts": 6345940157699.081, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940157773.849, "dur": 211.389, + "args": { + "External id": 985872,"Sequence number": 10552594, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 18772 + } + }, + { + "ph": "s", "id": 325, "pid": 2338710, "tid": 2338710, "ts": 6345940157773.849, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940157793.799, "dur": 4.157, + "args": { + "External id": 985873,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940157795.152, "dur": 2.441, + "args": { + "External id": 985874,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940157807.113, "dur": 7.970, + "args": { + "External id": 985875,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940157810.170, "dur": 4.535, + "args": { + "External id": 985876,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940157822.684, "dur": 6.027, + "args": { + "External id": 985877,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940157967.891, "dur": 4.005, + "args": { + "External id": 985878,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940157969.356, "dur": 2.361, + "args": { + "External id": 985879,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940158026.053, "dur": 166.987, + "args": { + "External id": 985880,"Sequence number": 10552595, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940158028.274, "dur": 10.407, + "args": { + "External id": 985881,"Sequence number": 10552595, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18781 + } + }, + { + "ph": "s", "id": 324, "pid": 2338710, "tid": 2338710, "ts": 6345940158028.274, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940158031.703, "dur": 5.029, + "args": { + "External id": 985882,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940158034.490, "dur": 1.781, + "args": { + "External id": 985883,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940158039.855, "dur": 152.772, + "args": { + "External id": 985884,"Sequence number": 10552596, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940158043.962, "dur": 6.022, + "args": { + "External id": 985885,"Sequence number": 10552596, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940158045.195, "dur": 4.505, + "args": { + "External id": 985886,"Sequence number": 10552596, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18786 + } + }, + { + "ph": "s", "id": 323, "pid": 2338710, "tid": 2338710, "ts": 6345940158045.195, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940158051.089, "dur": 130.321, + "args": { + "External id": 985887,"Sequence number": 10552597, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18787 + } + }, + { + "ph": "s", "id": 322, "pid": 2338710, "tid": 2338710, "ts": 6345940158051.089, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940158185.876, "dur": 5.820, + "args": { + "External id": 985888,"Sequence number": 10552598, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18788 + } + }, + { + "ph": "s", "id": 321, "pid": 2338710, "tid": 2338710, "ts": 6345940158185.876, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940158208.550, "dur": 77.725, + "args": { + "External id": 985889,"Sequence number": 10552599, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940158209.528, "dur": 9.592, + "args": { + "External id": 985890,"Sequence number": 10552599, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18790 + } + }, + { + "ph": "s", "id": 320, "pid": 2338710, "tid": 2338710, "ts": 6345940158209.528, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940158214.229, "dur": 3.404, + "args": { + "External id": 985891,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940158216.159, "dur": 1.177, + "args": { + "External id": 985892,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940158220.176, "dur": 65.760, + "args": { + "External id": 985893,"Sequence number": 10552600, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940158224.635, "dur": 6.524, + "args": { + "External id": 985894,"Sequence number": 10552600, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940158225.789, "dur": 5.181, + "args": { + "External id": 985895,"Sequence number": 10552600, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18795 + } + }, + { + "ph": "s", "id": 319, "pid": 2338710, "tid": 2338710, "ts": 6345940158225.789, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940158231.986, "dur": 48.908, + "args": { + "External id": 985896,"Sequence number": 10552601, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18796 + } + }, + { + "ph": "s", "id": 318, "pid": 2338710, "tid": 2338710, "ts": 6345940158231.986, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940158282.981, "dur": 2.535, + "args": { + "External id": 985897,"Sequence number": 10552602, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18797 + } + }, + { + "ph": "s", "id": 317, "pid": 2338710, "tid": 2338710, "ts": 6345940158282.981, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940158295.110, "dur": 69.100, + "args": { + "External id": 985898,"Sequence number": 10552603, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940158295.603, "dur": 9.947, + "args": { + "External id": 985899,"Sequence number": 10552603, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18799 + } + }, + { + "ph": "s", "id": 316, "pid": 2338710, "tid": 2338710, "ts": 6345940158295.603, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940158299.393, "dur": 4.955, + "args": { + "External id": 985900,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940158303.376, "dur": 0.807, + "args": { + "External id": 985901,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940158306.234, "dur": 57.670, + "args": { + "External id": 985902,"Sequence number": 10552604, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940158307.323, "dur": 9.107, + "args": { + "External id": 985903,"Sequence number": 10552604, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940158311.106, "dur": 5.140, + "args": { + "External id": 985904,"Sequence number": 10552604, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18804 + } + }, + { + "ph": "s", "id": 315, "pid": 2338710, "tid": 2338710, "ts": 6345940158311.106, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940158317.248, "dur": 39.685, + "args": { + "External id": 985905,"Sequence number": 10552605, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18805 + } + }, + { + "ph": "s", "id": 314, "pid": 2338710, "tid": 2338710, "ts": 6345940158317.248, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940158358.630, "dur": 4.890, + "args": { + "External id": 985906,"Sequence number": 10552606, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18806 + } + }, + { + "ph": "s", "id": 313, "pid": 2338710, "tid": 2338710, "ts": 6345940158358.630, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940158384.077, "dur": 6.688, + "args": { + "External id": 985907,"Sequence number": 10552607, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940158385.077, "dur": 5.486, + "args": { + "External id": 985908,"Sequence number": 10552607, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18808 + } + }, + { + "ph": "s", "id": 312, "pid": 2338710, "tid": 2338710, "ts": 6345940158385.077, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940158399.606, "dur": 4.349, + "args": { + "External id": 985909,"Sequence number": 10552608, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940158401.197, "dur": 2.609, + "args": { + "External id": 985910,"Sequence number": 10552608, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18810 + } + }, + { + "ph": "s", "id": 311, "pid": 2338710, "tid": 2338710, "ts": 6345940158401.197, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940158408.930, "dur": 5.755, + "args": { + "External id": 985911,"Sequence number": 10552609, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940158410.112, "dur": 4.436, + "args": { + "External id": 985912,"Sequence number": 10552609, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18812 + } + }, + { + "ph": "s", "id": 310, "pid": 2338710, "tid": 2338710, "ts": 6345940158410.112, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940158453.138, "dur": 191.966, + "args": { + "External id": 985913,"Sequence number": 10552610, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18813 + } + }, + { + "ph": "s", "id": 309, "pid": 2338710, "tid": 2338710, "ts": 6345940158453.138, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940158477.587, "dur": 10.937, + "args": { + "External id": 985914,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940158481.121, "dur": 6.989, + "args": { + "External id": 985915,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940158660.249, "dur": 123.849, + "args": { + "External id": 985916,"Sequence number": 10552611, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18816 + } + }, + { + "ph": "s", "id": 308, "pid": 2338710, "tid": 2338710, "ts": 6345940158660.249, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940158676.121, "dur": 8.597, + "args": { + "External id": 985917,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 18817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940158679.155, "dur": 5.141, + "args": { + "External id": 985918,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338710, "tid": 2338710, + "ts": 6345940158819.925, "dur": 284.281, + "args": { + "External id": 985919,"Sequence number": 10552612, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 18819 + } + }, + { + "ph": "s", "id": 307, "pid": 2338710, "tid": 2338710, "ts": 6345940158819.925, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345940158851.621, "dur": 175.612, + "args": { + "External id": 985920,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940158912.165, "dur": 9.301, + "args": { + "External id": 985921,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940158916.152, "dur": 4.553, + "args": { + "External id": 985922,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940158924.629, "dur": 6.320, + "args": { + "External id": 985923,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940158932.416, "dur": 1.268, + "args": { + "External id": 985924,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940158936.925, "dur": 5.391, + "args": { + "External id": 985925,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2338710, + "ts": 6345940159044.172, "dur": 6.198, + "args": { + "External id": 985926,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 18826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940159113.588, "dur": 8.308, + "args": { + "External id": 985927,"Sequence number": 10552613, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940159115.468, "dur": 6.252, + "args": { + "External id": 985928,"Sequence number": 10552613, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18828 + } + }, + { + "ph": "s", "id": 306, "pid": 2338710, "tid": 2338710, "ts": 6345940159115.468, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940159137.548, "dur": 156.988, + "args": { + "External id": 985929,"Sequence number": 10552614, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940159139.430, "dur": 15.598, + "args": { + "External id": 985930,"Sequence number": 10552614, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18830 + } + }, + { + "ph": "s", "id": 305, "pid": 2338710, "tid": 2338710, "ts": 6345940159139.430, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940159145.825, "dur": 7.765, + "args": { + "External id": 985931,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940159151.116, "dur": 2.144, + "args": { + "External id": 985932,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940159156.430, "dur": 137.779, + "args": { + "External id": 985933,"Sequence number": 10552615, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940159158.768, "dur": 4.572, + "args": { + "External id": 985934,"Sequence number": 10552615, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940159160.356, "dur": 2.797, + "args": { + "External id": 985935,"Sequence number": 10552615, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18835 + } + }, + { + "ph": "s", "id": 304, "pid": 2338710, "tid": 2338710, "ts": 6345940159160.356, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940159164.502, "dur": 121.014, + "args": { + "External id": 985936,"Sequence number": 10552616, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18836 + } + }, + { + "ph": "s", "id": 303, "pid": 2338710, "tid": 2338710, "ts": 6345940159164.502, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940159288.508, "dur": 4.860, + "args": { + "External id": 985937,"Sequence number": 10552617, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18837 + } + }, + { + "ph": "s", "id": 302, "pid": 2338710, "tid": 2338710, "ts": 6345940159288.508, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940159339.329, "dur": 254.638, + "args": { + "External id": 985938,"Sequence number": 10552618, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 18838 + } + }, + { + "ph": "s", "id": 301, "pid": 2338710, "tid": 2338710, "ts": 6345940159339.329, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940159361.672, "dur": 3.166, + "args": { + "External id": 985939,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940159362.564, "dur": 2.091, + "args": { + "External id": 985940,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338710, "tid": 2338710, + "ts": 6345940159369.660, "dur": 6.712, + "args": { + "External id": 985941,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 18841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940159373.511, "dur": 2.710, + "args": { + "External id": 985942,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940159374.948, "dur": 1.143, + "args": { + "External id": 985943,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940159385.448, "dur": 10.183, + "args": { + "External id": 985944,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940159388.690, "dur": 6.576, + "args": { + "External id": 985945,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940159403.085, "dur": 6.789, + "args": { + "External id": 985946,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940159416.181, "dur": 3.597, + "args": { + "External id": 985947,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940159570.854, "dur": 5.785, + "args": { + "External id": 985948,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940159572.434, "dur": 3.955, + "args": { + "External id": 985949,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940159579.464, "dur": 2.656, + "args": { + "External id": 985950,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940159580.601, "dur": 1.396, + "args": { + "External id": 985951,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940159615.214, "dur": 112.240, + "args": { + "External id": 985952,"Sequence number": 10552619, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940159616.994, "dur": 11.480, + "args": { + "External id": 985953,"Sequence number": 10552619, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18853 + } + }, + { + "ph": "s", "id": 300, "pid": 2338710, "tid": 2338710, "ts": 6345940159616.994, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940159622.740, "dur": 4.407, + "args": { + "External id": 985954,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940159625.431, "dur": 1.481, + "args": { + "External id": 985955,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940159629.901, "dur": 97.261, + "args": { + "External id": 985956,"Sequence number": 10552620, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940159632.205, "dur": 7.274, + "args": { + "External id": 985957,"Sequence number": 10552620, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940159633.163, "dur": 6.144, + "args": { + "External id": 985958,"Sequence number": 10552620, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18858 + } + }, + { + "ph": "s", "id": 299, "pid": 2338710, "tid": 2338710, "ts": 6345940159633.163, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940159640.714, "dur": 79.540, + "args": { + "External id": 985959,"Sequence number": 10552621, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18859 + } + }, + { + "ph": "s", "id": 298, "pid": 2338710, "tid": 2338710, "ts": 6345940159640.714, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940159722.853, "dur": 3.681, + "args": { + "External id": 985960,"Sequence number": 10552622, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18860 + } + }, + { + "ph": "s", "id": 297, "pid": 2338710, "tid": 2338710, "ts": 6345940159722.853, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940159739.922, "dur": 81.656, + "args": { + "External id": 985961,"Sequence number": 10552623, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940159740.658, "dur": 10.247, + "args": { + "External id": 985962,"Sequence number": 10552623, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18862 + } + }, + { + "ph": "s", "id": 296, "pid": 2338710, "tid": 2338710, "ts": 6345940159740.658, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940159742.841, "dur": 6.820, + "args": { + "External id": 985963,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940159748.767, "dur": 0.715, + "args": { + "External id": 985964,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940159752.051, "dur": 69.253, + "args": { + "External id": 985965,"Sequence number": 10552624, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940159753.132, "dur": 5.485, + "args": { + "External id": 985966,"Sequence number": 10552624, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940159754.288, "dur": 4.170, + "args": { + "External id": 985967,"Sequence number": 10552624, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18867 + } + }, + { + "ph": "s", "id": 295, "pid": 2338710, "tid": 2338710, "ts": 6345940159754.288, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940159761.536, "dur": 50.572, + "args": { + "External id": 985968,"Sequence number": 10552625, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18868 + } + }, + { + "ph": "s", "id": 294, "pid": 2338710, "tid": 2338710, "ts": 6345940159761.536, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940159814.107, "dur": 6.507, + "args": { + "External id": 985969,"Sequence number": 10552626, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18869 + } + }, + { + "ph": "s", "id": 293, "pid": 2338710, "tid": 2338710, "ts": 6345940159814.107, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940159844.916, "dur": 190.906, + "args": { + "External id": 985970,"Sequence number": 10552627, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18870 + } + }, + { + "ph": "s", "id": 292, "pid": 2338710, "tid": 2338710, "ts": 6345940159844.916, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940159889.197, "dur": 5.124, + "args": { + "External id": 985971,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940159932.129, "dur": 67.235, + "args": { + "External id": 985972,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940159933.125, "dur": 5.315, + "args": { + "External id": 985973,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 18873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940159934.452, "dur": 3.075, + "args": { + "External id": 985974,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 18874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940159936.433, "dur": 0.840, + "args": { + "External id": 985975,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 18875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940159939.240, "dur": 59.708, + "args": { + "External id": 985976,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 18876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940159940.798, "dur": 2.845, + "args": { + "External id": 985977,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940159942.200, "dur": 1.317, + "args": { + "External id": 985978,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940159946.762, "dur": 47.853, + "args": { + "External id": 985979,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 18879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940159997.059, "dur": 1.223, + "args": { + "External id": 985980,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2338710, + "ts": 6345940160048.646, "dur": 73.825, + "args": { + "External id": 985981,"Sequence number": 10552628, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 18881 + } + }, + { + "ph": "s", "id": 291, "pid": 2338710, "tid": 2338710, "ts": 6345940160048.646, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940160169.800, "dur": 218.590, + "args": { + "External id": 985982,"Sequence number": 10552629, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 18882 + } + }, + { + "ph": "s", "id": 290, "pid": 2338710, "tid": 2338710, "ts": 6345940160169.800, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940160192.931, "dur": 4.471, + "args": { + "External id": 985983,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940160194.356, "dur": 2.893, + "args": { + "External id": 985984,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940160206.816, "dur": 10.582, + "args": { + "External id": 985985,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940160212.413, "dur": 4.581, + "args": { + "External id": 985986,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940160224.731, "dur": 6.918, + "args": { + "External id": 985987,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940160371.012, "dur": 5.281, + "args": { + "External id": 985988,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940160374.143, "dur": 1.827, + "args": { + "External id": 985989,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940160408.661, "dur": 108.788, + "args": { + "External id": 985990,"Sequence number": 10552630, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940160409.751, "dur": 11.533, + "args": { + "External id": 985991,"Sequence number": 10552630, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18891 + } + }, + { + "ph": "s", "id": 289, "pid": 2338710, "tid": 2338710, "ts": 6345940160409.751, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940160412.497, "dur": 7.249, + "args": { + "External id": 985992,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940160417.733, "dur": 1.799, + "args": { + "External id": 985993,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940160422.572, "dur": 94.546, + "args": { + "External id": 985994,"Sequence number": 10552631, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940160424.915, "dur": 4.435, + "args": { + "External id": 985995,"Sequence number": 10552631, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940160425.420, "dur": 3.770, + "args": { + "External id": 985996,"Sequence number": 10552631, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18896 + } + }, + { + "ph": "s", "id": 288, "pid": 2338710, "tid": 2338710, "ts": 6345940160425.420, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940160430.116, "dur": 79.811, + "args": { + "External id": 985997,"Sequence number": 10552632, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18897 + } + }, + { + "ph": "s", "id": 287, "pid": 2338710, "tid": 2338710, "ts": 6345940160430.116, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940160512.663, "dur": 3.619, + "args": { + "External id": 985998,"Sequence number": 10552633, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18898 + } + }, + { + "ph": "s", "id": 286, "pid": 2338710, "tid": 2338710, "ts": 6345940160512.663, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940160526.788, "dur": 73.478, + "args": { + "External id": 985999,"Sequence number": 10552634, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940160527.423, "dur": 6.574, + "args": { + "External id": 986000,"Sequence number": 10552634, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18900 + } + }, + { + "ph": "s", "id": 285, "pid": 2338710, "tid": 2338710, "ts": 6345940160527.423, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940160529.985, "dur": 2.795, + "args": { + "External id": 986001,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940160531.643, "dur": 0.943, + "args": { + "External id": 986002,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940160537.219, "dur": 62.765, + "args": { + "External id": 986003,"Sequence number": 10552635, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940160538.349, "dur": 5.975, + "args": { + "External id": 986004,"Sequence number": 10552635, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940160539.898, "dur": 4.289, + "args": { + "External id": 986005,"Sequence number": 10552635, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18905 + } + }, + { + "ph": "s", "id": 284, "pid": 2338710, "tid": 2338710, "ts": 6345940160539.898, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940160545.363, "dur": 44.613, + "args": { + "External id": 986006,"Sequence number": 10552636, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18906 + } + }, + { + "ph": "s", "id": 283, "pid": 2338710, "tid": 2338710, "ts": 6345940160545.363, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940160592.066, "dur": 7.359, + "args": { + "External id": 986007,"Sequence number": 10552637, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18907 + } + }, + { + "ph": "s", "id": 282, "pid": 2338710, "tid": 2338710, "ts": 6345940160592.066, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940160611.081, "dur": 64.731, + "args": { + "External id": 986008,"Sequence number": 10552638, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940160611.685, "dur": 5.519, + "args": { + "External id": 986009,"Sequence number": 10552638, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18909 + } + }, + { + "ph": "s", "id": 281, "pid": 2338710, "tid": 2338710, "ts": 6345940160611.685, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940160613.316, "dur": 2.647, + "args": { + "External id": 986010,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940160615.247, "dur": 0.528, + "args": { + "External id": 986011,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940160618.111, "dur": 57.471, + "args": { + "External id": 986012,"Sequence number": 10552639, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940160622.283, "dur": 5.454, + "args": { + "External id": 986013,"Sequence number": 10552639, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940160623.275, "dur": 4.276, + "args": { + "External id": 986014,"Sequence number": 10552639, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18914 + } + }, + { + "ph": "s", "id": 280, "pid": 2338710, "tid": 2338710, "ts": 6345940160623.275, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940160628.439, "dur": 41.495, + "args": { + "External id": 986015,"Sequence number": 10552640, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18915 + } + }, + { + "ph": "s", "id": 279, "pid": 2338710, "tid": 2338710, "ts": 6345940160628.439, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940160672.443, "dur": 2.725, + "args": { + "External id": 986016,"Sequence number": 10552641, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18916 + } + }, + { + "ph": "s", "id": 278, "pid": 2338710, "tid": 2338710, "ts": 6345940160672.443, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940160696.629, "dur": 4.831, + "args": { + "External id": 986017,"Sequence number": 10552642, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940160698.104, "dur": 3.213, + "args": { + "External id": 986018,"Sequence number": 10552642, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18918 + } + }, + { + "ph": "s", "id": 277, "pid": 2338710, "tid": 2338710, "ts": 6345940160698.104, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940160708.972, "dur": 4.194, + "args": { + "External id": 986019,"Sequence number": 10552643, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940160710.322, "dur": 2.705, + "args": { + "External id": 986020,"Sequence number": 10552643, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18920 + } + }, + { + "ph": "s", "id": 276, "pid": 2338710, "tid": 2338710, "ts": 6345940160710.322, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940160718.068, "dur": 7.316, + "args": { + "External id": 986021,"Sequence number": 10552644, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940160719.704, "dur": 5.517, + "args": { + "External id": 986022,"Sequence number": 10552644, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18922 + } + }, + { + "ph": "s", "id": 275, "pid": 2338710, "tid": 2338710, "ts": 6345940160719.704, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940160756.996, "dur": 160.875, + "args": { + "External id": 986023,"Sequence number": 10552645, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18923 + } + }, + { + "ph": "s", "id": 274, "pid": 2338710, "tid": 2338710, "ts": 6345940160756.996, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940160777.809, "dur": 8.891, + "args": { + "External id": 986024,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940160780.525, "dur": 5.626, + "args": { + "External id": 986025,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940160931.141, "dur": 177.886, + "args": { + "External id": 986026,"Sequence number": 10552646, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18926 + } + }, + { + "ph": "s", "id": 273, "pid": 2338710, "tid": 2338710, "ts": 6345940160931.141, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940160945.982, "dur": 10.209, + "args": { + "External id": 986027,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 18927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940160948.641, "dur": 7.131, + "args": { + "External id": 986028,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338710, "tid": 2338710, + "ts": 6345940161148.184, "dur": 218.248, + "args": { + "External id": 986029,"Sequence number": 10552647, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 18929 + } + }, + { + "ph": "s", "id": 272, "pid": 2338710, "tid": 2338710, "ts": 6345940161148.184, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345940161180.565, "dur": 152.632, + "args": { + "External id": 986030,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940161234.892, "dur": 11.100, + "args": { + "External id": 986031,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940161238.872, "dur": 6.325, + "args": { + "External id": 986032,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940161249.209, "dur": 4.903, + "args": { + "External id": 986033,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940161257.979, "dur": 1.307, + "args": { + "External id": 986034,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940161264.097, "dur": 6.024, + "args": { + "External id": 986035,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338710, "tid": 2338710, + "ts": 6345940161346.829, "dur": 5.931, + "args": { + "External id": 986036,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 18936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940161373.163, "dur": 7.655, + "args": { + "External id": 986037,"Sequence number": 10552648, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940161375.179, "dur": 5.467, + "args": { + "External id": 986038,"Sequence number": 10552648, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18938 + } + }, + { + "ph": "s", "id": 271, "pid": 2338710, "tid": 2338710, "ts": 6345940161375.179, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940161395.127, "dur": 134.662, + "args": { + "External id": 986039,"Sequence number": 10552649, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940161396.690, "dur": 12.798, + "args": { + "External id": 986040,"Sequence number": 10552649, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18940 + } + }, + { + "ph": "s", "id": 270, "pid": 2338710, "tid": 2338710, "ts": 6345940161396.690, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940161400.466, "dur": 7.816, + "args": { + "External id": 986041,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940161405.680, "dur": 2.217, + "args": { + "External id": 986042,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940161410.901, "dur": 118.392, + "args": { + "External id": 986043,"Sequence number": 10552650, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940161413.338, "dur": 4.565, + "args": { + "External id": 986044,"Sequence number": 10552650, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940161414.360, "dur": 3.397, + "args": { + "External id": 986045,"Sequence number": 10552650, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18945 + } + }, + { + "ph": "s", "id": 269, "pid": 2338710, "tid": 2338710, "ts": 6345940161414.360, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940161421.382, "dur": 99.030, + "args": { + "External id": 986046,"Sequence number": 10552651, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18946 + } + }, + { + "ph": "s", "id": 268, "pid": 2338710, "tid": 2338710, "ts": 6345940161421.382, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940161523.528, "dur": 4.867, + "args": { + "External id": 986047,"Sequence number": 10552652, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18947 + } + }, + { + "ph": "s", "id": 267, "pid": 2338710, "tid": 2338710, "ts": 6345940161523.528, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940161568.981, "dur": 257.811, + "args": { + "External id": 986048,"Sequence number": 10552653, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 18948 + } + }, + { + "ph": "s", "id": 266, "pid": 2338710, "tid": 2338710, "ts": 6345940161568.981, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940161590.042, "dur": 9.494, + "args": { + "External id": 986049,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940161597.606, "dur": 1.779, + "args": { + "External id": 986050,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338710, "tid": 2338710, + "ts": 6345940161604.214, "dur": 3.582, + "args": { + "External id": 986051,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 18951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940161605.345, "dur": 2.310, + "args": { + "External id": 986052,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940161606.140, "dur": 1.407, + "args": { + "External id": 986053,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940161617.073, "dur": 8.497, + "args": { + "External id": 986054,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940161619.733, "dur": 5.440, + "args": { + "External id": 986055,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940161635.406, "dur": 5.792, + "args": { + "External id": 986056,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940161645.531, "dur": 4.982, + "args": { + "External id": 986057,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940161804.923, "dur": 3.791, + "args": { + "External id": 986058,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940161806.617, "dur": 1.855, + "args": { + "External id": 986059,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940161811.567, "dur": 2.833, + "args": { + "External id": 986060,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940161813.094, "dur": 1.181, + "args": { + "External id": 986061,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940161847.332, "dur": 112.998, + "args": { + "External id": 986062,"Sequence number": 10552654, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940161848.584, "dur": 12.195, + "args": { + "External id": 986063,"Sequence number": 10552654, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18963 + } + }, + { + "ph": "s", "id": 265, "pid": 2338710, "tid": 2338710, "ts": 6345940161848.584, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940161852.786, "dur": 6.795, + "args": { + "External id": 986064,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940161855.943, "dur": 3.323, + "args": { + "External id": 986065,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940161861.860, "dur": 98.068, + "args": { + "External id": 986066,"Sequence number": 10552655, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940161863.651, "dur": 6.111, + "args": { + "External id": 986067,"Sequence number": 10552655, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940161866.641, "dur": 2.968, + "args": { + "External id": 986068,"Sequence number": 10552655, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18968 + } + }, + { + "ph": "s", "id": 264, "pid": 2338710, "tid": 2338710, "ts": 6345940161866.641, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940161871.012, "dur": 80.474, + "args": { + "External id": 986069,"Sequence number": 10552656, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18969 + } + }, + { + "ph": "s", "id": 263, "pid": 2338710, "tid": 2338710, "ts": 6345940161871.012, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940161954.417, "dur": 4.880, + "args": { + "External id": 986070,"Sequence number": 10552657, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18970 + } + }, + { + "ph": "s", "id": 262, "pid": 2338710, "tid": 2338710, "ts": 6345940161954.417, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940161969.661, "dur": 166.729, + "args": { + "External id": 986071,"Sequence number": 10552658, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940161970.642, "dur": 11.966, + "args": { + "External id": 986072,"Sequence number": 10552658, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18972 + } + }, + { + "ph": "s", "id": 261, "pid": 2338710, "tid": 2338710, "ts": 6345940161970.642, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940161978.279, "dur": 2.824, + "args": { + "External id": 986073,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940161980.228, "dur": 0.673, + "args": { + "External id": 986074,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940161983.809, "dur": 152.146, + "args": { + "External id": 986075,"Sequence number": 10552659, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940161985.073, "dur": 8.181, + "args": { + "External id": 986076,"Sequence number": 10552659, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940161986.455, "dur": 6.623, + "args": { + "External id": 986077,"Sequence number": 10552659, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18977 + } + }, + { + "ph": "s", "id": 260, "pid": 2338710, "tid": 2338710, "ts": 6345940161986.455, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940161993.815, "dur": 129.973, + "args": { + "External id": 986078,"Sequence number": 10552660, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18978 + } + }, + { + "ph": "s", "id": 259, "pid": 2338710, "tid": 2338710, "ts": 6345940161993.815, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940162128.552, "dur": 6.368, + "args": { + "External id": 986079,"Sequence number": 10552661, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18979 + } + }, + { + "ph": "s", "id": 258, "pid": 2338710, "tid": 2338710, "ts": 6345940162128.552, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940162164.720, "dur": 190.683, + "args": { + "External id": 986080,"Sequence number": 10552662, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18980 + } + }, + { + "ph": "s", "id": 257, "pid": 2338710, "tid": 2338710, "ts": 6345940162164.720, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940162214.573, "dur": 7.381, + "args": { + "External id": 986081,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940162258.190, "dur": 82.325, + "args": { + "External id": 986082,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940162259.688, "dur": 8.472, + "args": { + "External id": 986083,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 18983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940162261.199, "dur": 5.624, + "args": { + "External id": 986084,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 18984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940162263.449, "dur": 3.119, + "args": { + "External id": 986085,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 18985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940162269.084, "dur": 71.085, + "args": { + "External id": 986086,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 18986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338710, "tid": 2338710, + "ts": 6345940162271.082, "dur": 5.530, + "args": { + "External id": 986087,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940162274.922, "dur": 1.570, + "args": { + "External id": 986088,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940162277.694, "dur": 57.454, + "args": { + "External id": 986089,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 18989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338710, "tid": 2338710, + "ts": 6345940162337.911, "dur": 1.480, + "args": { + "External id": 986090,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2338710, + "ts": 6345940162366.036, "dur": 28.396, + "args": { + "External id": 986091,"Sequence number": 10552663, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 18991 + } + }, + { + "ph": "s", "id": 256, "pid": 2338710, "tid": 2338710, "ts": 6345940162366.036, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2338710, "tid": 2338710, + "ts": 6345940162416.004, "dur": 49.279, + "args": { + "External id": 986092,"Sequence number": 10552664, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "-2"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1]], []], "Input Dims": [[[8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096]], []], "Ev Idx": 18992 + } + }, + { + "ph": "s", "id": 255, "pid": 2338710, "tid": 2338710, "ts": 6345940162416.004, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 2338710, "tid": 2338710, + "ts": 6345940162426.086, "dur": 34.422, + "args": { + "External id": 986093,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1]], []], "Input Dims": [[[8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096]], []], "Ev Idx": 18993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940162462.136, "dur": 1.522, + "args": { + "External id": 986094,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 16384], []], "Ev Idx": 18994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338710, "tid": 2338710, + "ts": 6345940162506.630, "dur": 58.599, + "args": { + "External id": 986095,"Record function id": 0, "Ev Idx": 18995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 2/0", "pid": 2338710, "tid": 2338710, + "ts": 6345940162567.825, "dur": 220.375, + "args": { + "External id": 986096,"Record function id": 0, "Ev Idx": 18996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940162609.412, "dur": 168.967, + "args": { + "External id": 986097,"Sequence number": 10552665, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1], [67108864, 16384, 4096, 1]], "Input Dims": [[4096], [8, 4096, 4, 4096]], "Ev Idx": 18997 + } + }, + { + "ph": "s", "id": 254, "pid": 2338710, "tid": 2338710, "ts": 6345940162609.412, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338710, "tid": 2338710, + "ts": 6345940162691.828, "dur": 42.859, + "args": { + "External id": 986098,"kernel_hash": "c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/7z/c7zx2vpbs74pgqq3nrkkn5rgfu7zq6men4epnxhpm4uojx46qomi.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[131072, 4096], [131072, 4096], [4096], [131072], [], [], [], [], [], [], [], [], []], "Ev Idx": 18998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2338710, + "ts": 6345940162889.688, "dur": 45.073, + "args": { + "External id": 986099,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 18999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940162893.380, "dur": 5.650, + "args": { + "External id": 986100,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345940162902.775, "dur": 31.627, + "args": { + "External id": 986101,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 19001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940162906.994, "dur": 26.732, + "args": { + "External id": 986102,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 19002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2338710, + "ts": 6345940162940.350, "dur": 20.254, + "args": { + "External id": 986103,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940162941.587, "dur": 2.801, + "args": { + "External id": 986104,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345940162945.467, "dur": 14.804, + "args": { + "External id": 986105,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 19005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940162948.697, "dur": 11.045, + "args": { + "External id": 986106,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 19006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2338710, + "ts": 6345940162963.858, "dur": 15.560, + "args": { + "External id": 986107,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940162964.550, "dur": 2.169, + "args": { + "External id": 986108,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345940162967.277, "dur": 11.827, + "args": { + "External id": 986109,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 19009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940162968.081, "dur": 10.506, + "args": { + "External id": 986110,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 19010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345940162990.743, "dur": 0.968, + "args": { + "External id": 986111,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], [], []], "Ev Idx": 19011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338710, "tid": 2338710, + "ts": 6345940163000.059, "dur": 96.937, + "args": { + "External id": 986112,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "5", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 19012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940163051.946, "dur": 2.822, + "args": { + "External id": 986113,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 19013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940163108.430, "dur": 9.249, + "args": { + "External id": 986114,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 19014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940163114.244, "dur": 1.241, + "args": { + "External id": 986115,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 19015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940163118.986, "dur": 3.344, + "args": { + "External id": 986116,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 19016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940163120.864, "dur": 0.629, + "args": { + "External id": 986117,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 19017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940163123.812, "dur": 3.689, + "args": { + "External id": 986118,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "1", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 19018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940163126.267, "dur": 0.571, + "args": { + "External id": 986119,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 19019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940163131.189, "dur": 5.893, + "args": { + "External id": 986120,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 4], [], [], [], []], "Ev Idx": 19020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940163133.177, "dur": 3.161, + "args": { + "External id": 986121,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 4], [], [], []], "Ev Idx": 19021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940163138.148, "dur": 3.330, + "args": { + "External id": 986122,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 4], [], [], [], []], "Ev Idx": 19022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940163140.207, "dur": 0.467, + "args": { + "External id": 986123,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 4], [], [], []], "Ev Idx": 19023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940163142.534, "dur": 3.049, + "args": { + "External id": 986124,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4096, 4], [], [], [], []], "Ev Idx": 19024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940163144.501, "dur": 0.453, + "args": { + "External id": 986125,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4096, 4], [], [], []], "Ev Idx": 19025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940163151.505, "dur": 5.835, + "args": { + "External id": 986126,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "2"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4096, 4], [], []], "Ev Idx": 19026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940163155.858, "dur": 0.520, + "args": { + "External id": 986127,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4096, 4], [], [], []], "Ev Idx": 19027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940163161.939, "dur": 2.973, + "args": { + "External id": 986128,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 19028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940163163.724, "dur": 0.488, + "args": { + "External id": 986129,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2338710, + "ts": 6345940163169.018, "dur": 10.663, + "args": { + "External id": 986130,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 19030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940163177.404, "dur": 0.656, + "args": { + "External id": 986131,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940163181.026, "dur": 2.604, + "args": { + "External id": 986132,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 19032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940163182.538, "dur": 0.401, + "args": { + "External id": 986133,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 19033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940163186.431, "dur": 7.261, + "args": { + "External id": 986134,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19034 + } + }, + { + "ph": "s", "id": 253, "pid": 2338710, "tid": 2338710, "ts": 6345940163186.431, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940163191.124, "dur": 0.647, + "args": { + "External id": 986135,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940163197.347, "dur": 7.531, + "args": { + "External id": 986136,"Sequence number": 10552667, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19036 + } + }, + { + "ph": "s", "id": 252, "pid": 2338710, "tid": 2338710, "ts": 6345940163197.347, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940163201.124, "dur": 2.795, + "args": { + "External id": 986137,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2338710, + "ts": 6345940163205.968, "dur": 5.492, + "args": { + "External id": 986138,"Sequence number": 10552668, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 19038 + } + }, + { + "ph": "s", "id": 251, "pid": 2338710, "tid": 2338710, "ts": 6345940163205.968, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940163209.859, "dur": 0.619, + "args": { + "External id": 986139,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940163212.585, "dur": 6.877, + "args": { + "External id": 986140,"Sequence number": 10552669, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 19040 + } + }, + { + "ph": "s", "id": 250, "pid": 2338710, "tid": 2338710, "ts": 6345940163212.585, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940163216.042, "dur": 2.345, + "args": { + "External id": 986141,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 19041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338710, "tid": 2338710, + "ts": 6345940163223.723, "dur": 48.361, + "args": { + "External id": 986142,"Sequence number": 10552670, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2338710, + "ts": 6345940163228.362, "dur": 43.416, + "args": { + "External id": 986143,"Sequence number": 10552670, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940163231.732, "dur": 8.043, + "args": { + "External id": 986144,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 19044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940163233.906, "dur": 5.098, + "args": { + "External id": 986145,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940163241.486, "dur": 29.633, + "args": { + "External id": 986146,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 19046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940163304.801, "dur": 5.371, + "args": { + "External id": 986147,"Sequence number": 10552670, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 19047 + } + }, + { + "ph": "s", "id": 249, "pid": 2338710, "tid": 2338710, "ts": 6345940163304.801, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940163312.644, "dur": 1.293, + "args": { + "External id": 986148,"Sequence number": 10552671, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940163348.250, "dur": 129776.434, + "args": { + "External id": 986149,"Sequence number": 10552671, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 19049 + } + }, + { + "ph": "s", "id": 248, "pid": 2338710, "tid": 2338710, "ts": 6345940163348.250, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338710, "tid": 2338710, + "ts": 6345940163363.199, "dur": 31.285, + "args": { + "External id": 986150,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2338710, + "ts": 6345940163363.834, "dur": 30.395, + "args": { + "External id": 986151,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940163365.499, "dur": 7.858, + "args": { + "External id": 986152,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940163366.899, "dur": 5.967, + "args": { + "External id": 986153,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940163374.348, "dur": 19.391, + "args": { + "External id": 986154,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 19054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940163415.141, "dur": 27.477, + "args": { + "External id": 986155,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940163416.192, "dur": 6.758, + "args": { + "External id": 986156,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940163418.673, "dur": 3.936, + "args": { + "External id": 986157,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345940163424.214, "dur": 18.155, + "args": { + "External id": 986158,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940163425.432, "dur": 16.538, + "args": { + "External id": 986159,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940163447.348, "dur": 23.999, + "args": { + "External id": 986160,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940163448.247, "dur": 10.149, + "args": { + "External id": 986161,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940163452.051, "dur": 6.099, + "args": { + "External id": 986162,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345940163458.992, "dur": 12.116, + "args": { + "External id": 986163,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940163459.505, "dur": 11.188, + "args": { + "External id": 986164,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 19064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2338710, + "ts": 6345940163477.298, "dur": 18.084, + "args": { + "External id": 986165,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940163478.564, "dur": 4.310, + "args": { + "External id": 986166,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345940163483.500, "dur": 11.603, + "args": { + "External id": 986167,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 19067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940163484.101, "dur": 10.555, + "args": { + "External id": 986168,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338710, "tid": 2338710, + "ts": 6345940163505.514, "dur": 31.554, + "args": { + "External id": 986169,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940163541.959, "dur": 66.147, + "args": { + "External id": 986170,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940163545.480, "dur": 62.113, + "args": { + "External id": 986171,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940163553.406, "dur": 0.834, + "args": { + "External id": 986172,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 19072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345940163555.752, "dur": 27.180, + "args": { + "External id": 986173,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345940163557.728, "dur": 24.911, + "args": { + "External id": 986174,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 19074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940163560.872, "dur": 3.252, + "args": { + "External id": 986175,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940163565.118, "dur": 17.015, + "args": { + "External id": 986176,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 19076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338710, "tid": 2338710, + "ts": 6345940163613.383, "dur": 122510.440, + "args": { + "External id": 986177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338710, "tid": 2338710, + "ts": 6345940163615.735, "dur": 122505.276, + "args": { + "External id": 986178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940286149.291, "dur": 17.562, + "args": { + "External id": 986179,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940286160.475, "dur": 2.985, + "args": { + "External id": 986180,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940286178.741, "dur": 155.520, + "args": { + "External id": 986181,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940286181.936, "dur": 9.930, + "args": { + "External id": 986182,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940286185.473, "dur": 5.294, + "args": { + "External id": 986183,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940286189.173, "dur": 1.263, + "args": { + "External id": 986184,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940286194.794, "dur": 138.707, + "args": { + "External id": 986185,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940286196.921, "dur": 135.645, + "args": { + "External id": 986186,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940286341.333, "dur": 6.482, + "args": { + "External id": 986187,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940286344.208, "dur": 0.770, + "args": { + "External id": 986188,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940286362.229, "dur": 6.295, + "args": { + "External id": 986189,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940286387.702, "dur": 12.758, + "args": { + "External id": 986190,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940286391.669, "dur": 8.399, + "args": { + "External id": 986191,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940286586.214, "dur": 307.382, + "args": { + "External id": 986192,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940286593.169, "dur": 2.826, + "args": { + "External id": 986193,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940286602.653, "dur": 290.319, + "args": { + "External id": 986194,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940286607.016, "dur": 0.977, + "args": { + "External id": 986195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940286611.425, "dur": 39.857, + "args": { + "External id": 986196,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940286654.187, "dur": 7.736, + "args": { + "External id": 986197,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940286659.893, "dur": 0.881, + "args": { + "External id": 986198,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940286664.469, "dur": 38.255, + "args": { + "External id": 986199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940286666.589, "dur": 1.721, + "args": { + "External id": 986200,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940286671.182, "dur": 31.205, + "args": { + "External id": 986201,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940286676.454, "dur": 4.480, + "args": { + "External id": 986202,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940286708.973, "dur": 33.037, + "args": { + "External id": 986203,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940286745.173, "dur": 21.553, + "args": { + "External id": 986204,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940286771.027, "dur": 22.806, + "args": { + "External id": 986205,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940286796.442, "dur": 21.231, + "args": { + "External id": 986206,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940286820.242, "dur": 28.342, + "args": { + "External id": 986207,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940286823.389, "dur": 1.704, + "args": { + "External id": 986208,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940286828.444, "dur": 0.512, + "args": { + "External id": 986209,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940286854.422, "dur": 18.962, + "args": { + "External id": 986210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940286875.640, "dur": 15.815, + "args": { + "External id": 986211,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940286903.009, "dur": 2.409, + "args": { + "External id": 986212,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940286914.686, "dur": 6.363, + "args": { + "External id": 986213,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940286919.266, "dur": 0.627, + "args": { + "External id": 986214,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940287041.258, "dur": 142.114, + "args": { + "External id": 986215,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940287194.491, "dur": 13.446, + "args": { + "External id": 986216,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940287200.622, "dur": 4.794, + "args": { + "External id": 986217,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940287212.932, "dur": 41.415, + "args": { + "External id": 986218,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940287262.263, "dur": 7.612, + "args": { + "External id": 986219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940287264.449, "dur": 4.633, + "args": { + "External id": 986220,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940287267.224, "dur": 1.532, + "args": { + "External id": 986221,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940287276.230, "dur": 63.416, + "args": { + "External id": 986222,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940287277.872, "dur": 61.129, + "args": { + "External id": 986223,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940287347.344, "dur": 22.607, + "args": { + "External id": 986224,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940287381.388, "dur": 5.748, + "args": { + "External id": 986225,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940287384.267, "dur": 0.646, + "args": { + "External id": 986226,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940287392.839, "dur": 58.582, + "args": { + "External id": 986227,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940287394.105, "dur": 4.597, + "args": { + "External id": 986228,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940287395.083, "dur": 2.869, + "args": { + "External id": 986229,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940287396.957, "dur": 0.814, + "args": { + "External id": 986230,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940287402.320, "dur": 48.527, + "args": { + "External id": 986231,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940287403.352, "dur": 46.839, + "args": { + "External id": 986232,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940287456.907, "dur": 6.626, + "args": { + "External id": 986233,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940287459.152, "dur": 2.586, + "args": { + "External id": 986234,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940287471.965, "dur": 2.412, + "args": { + "External id": 986235,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940287485.395, "dur": 11.481, + "args": { + "External id": 986236,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940287490.898, "dur": 5.641, + "args": { + "External id": 986237,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940287622.387, "dur": 234.287, + "args": { + "External id": 986238,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940287626.933, "dur": 2.070, + "args": { + "External id": 986239,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940287631.238, "dur": 224.875, + "args": { + "External id": 986240,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940287633.105, "dur": 0.560, + "args": { + "External id": 986241,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940287635.234, "dur": 28.013, + "args": { + "External id": 986242,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940287665.427, "dur": 7.051, + "args": { + "External id": 986243,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940287668.350, "dur": 3.848, + "args": { + "External id": 986244,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940287673.816, "dur": 30.619, + "args": { + "External id": 986245,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940287677.769, "dur": 1.309, + "args": { + "External id": 986246,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940287680.539, "dur": 23.531, + "args": { + "External id": 986247,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940287685.945, "dur": 3.113, + "args": { + "External id": 986248,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940287706.373, "dur": 25.942, + "args": { + "External id": 986249,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940287734.265, "dur": 19.222, + "args": { + "External id": 986250,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940287756.907, "dur": 15.352, + "args": { + "External id": 986251,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940287774.119, "dur": 18.192, + "args": { + "External id": 986252,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940287795.009, "dur": 28.233, + "args": { + "External id": 986253,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940287797.422, "dur": 2.537, + "args": { + "External id": 986254,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940287805.031, "dur": 0.881, + "args": { + "External id": 986255,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940287824.858, "dur": 15.861, + "args": { + "External id": 986256,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940287842.092, "dur": 12.795, + "args": { + "External id": 986257,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940287865.618, "dur": 2.068, + "args": { + "External id": 986258,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940287879.379, "dur": 6.004, + "args": { + "External id": 986259,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940287882.759, "dur": 1.561, + "args": { + "External id": 986260,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940287970.659, "dur": 128.371, + "args": { + "External id": 986261,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940288108.737, "dur": 9.790, + "args": { + "External id": 986262,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940288115.206, "dur": 1.387, + "args": { + "External id": 986263,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940288120.240, "dur": 39.233, + "args": { + "External id": 986264,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940288166.386, "dur": 7.027, + "args": { + "External id": 986265,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940288168.494, "dur": 4.095, + "args": { + "External id": 986266,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940288171.329, "dur": 0.973, + "args": { + "External id": 986267,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940288177.299, "dur": 61.837, + "args": { + "External id": 986268,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940288178.833, "dur": 59.547, + "args": { + "External id": 986269,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940288246.453, "dur": 20.527, + "args": { + "External id": 986270,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940288274.926, "dur": 4.714, + "args": { + "External id": 986271,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940288277.525, "dur": 0.995, + "args": { + "External id": 986272,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940288285.429, "dur": 105.553, + "args": { + "External id": 986273,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940288290.794, "dur": 8.604, + "args": { + "External id": 986274,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940288292.054, "dur": 6.560, + "args": { + "External id": 986275,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940288297.654, "dur": 0.708, + "args": { + "External id": 986276,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940288300.573, "dur": 89.794, + "args": { + "External id": 986277,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940288301.477, "dur": 87.974, + "args": { + "External id": 986278,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940288397.702, "dur": 7.027, + "args": { + "External id": 986279,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940288400.273, "dur": 2.925, + "args": { + "External id": 986280,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940288412.285, "dur": 2.170, + "args": { + "External id": 986281,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940288427.239, "dur": 7.439, + "args": { + "External id": 986282,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940288430.057, "dur": 4.324, + "args": { + "External id": 986283,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940288550.128, "dur": 222.707, + "args": { + "External id": 986284,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940288553.518, "dur": 2.068, + "args": { + "External id": 986285,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940288557.368, "dur": 214.939, + "args": { + "External id": 986286,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940288560.394, "dur": 0.524, + "args": { + "External id": 986287,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940288562.478, "dur": 25.637, + "args": { + "External id": 986288,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940288592.940, "dur": 4.094, + "args": { + "External id": 986289,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940288595.818, "dur": 0.966, + "args": { + "External id": 986290,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940288598.401, "dur": 27.692, + "args": { + "External id": 986291,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940288600.907, "dur": 1.123, + "args": { + "External id": 986292,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940288603.638, "dur": 22.034, + "args": { + "External id": 986293,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940288606.856, "dur": 3.299, + "args": { + "External id": 986294,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940288627.990, "dur": 27.856, + "args": { + "External id": 986295,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940288657.808, "dur": 16.480, + "args": { + "External id": 986296,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940288677.655, "dur": 16.507, + "args": { + "External id": 986297,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940288695.738, "dur": 15.690, + "args": { + "External id": 986298,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940288716.275, "dur": 24.515, + "args": { + "External id": 986299,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940288718.269, "dur": 2.216, + "args": { + "External id": 986300,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940288723.262, "dur": 0.836, + "args": { + "External id": 986301,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940288742.537, "dur": 14.033, + "args": { + "External id": 986302,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940288758.089, "dur": 13.127, + "args": { + "External id": 986303,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940288780.717, "dur": 1.851, + "args": { + "External id": 986304,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940288795.072, "dur": 4.476, + "args": { + "External id": 986305,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940288798.112, "dur": 0.490, + "args": { + "External id": 986306,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940288875.741, "dur": 62.041, + "args": { + "External id": 986307,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940288943.456, "dur": 7.278, + "args": { + "External id": 986308,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940288946.822, "dur": 2.560, + "args": { + "External id": 986309,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940288952.294, "dur": 30.885, + "args": { + "External id": 986310,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940288988.494, "dur": 7.795, + "args": { + "External id": 986311,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940288990.001, "dur": 5.559, + "args": { + "External id": 986312,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940288994.440, "dur": 0.878, + "args": { + "External id": 986313,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940288999.256, "dur": 115.078, + "args": { + "External id": 986314,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940289000.944, "dur": 112.211, + "args": { + "External id": 986315,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940289121.594, "dur": 21.127, + "args": { + "External id": 986316,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940289150.842, "dur": 5.486, + "args": { + "External id": 986317,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940289153.976, "dur": 0.974, + "args": { + "External id": 986318,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940289164.283, "dur": 58.515, + "args": { + "External id": 986319,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940289165.553, "dur": 5.134, + "args": { + "External id": 986320,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940289167.295, "dur": 2.710, + "args": { + "External id": 986321,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940289168.970, "dur": 0.816, + "args": { + "External id": 986322,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940289171.744, "dur": 50.473, + "args": { + "External id": 986323,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940289172.567, "dur": 48.924, + "args": { + "External id": 986324,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940289227.778, "dur": 4.367, + "args": { + "External id": 986325,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940289230.120, "dur": 0.646, + "args": { + "External id": 986326,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940289241.883, "dur": 1.953, + "args": { + "External id": 986327,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940289254.031, "dur": 7.361, + "args": { + "External id": 986328,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940289256.428, "dur": 4.583, + "args": { + "External id": 986329,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940289370.630, "dur": 220.558, + "args": { + "External id": 986330,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940289373.757, "dur": 5.341, + "args": { + "External id": 986331,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940289382.990, "dur": 207.608, + "args": { + "External id": 986332,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940289384.585, "dur": 0.419, + "args": { + "External id": 986333,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940289386.823, "dur": 25.494, + "args": { + "External id": 986334,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940289414.020, "dur": 5.850, + "args": { + "External id": 986335,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940289418.788, "dur": 0.837, + "args": { + "External id": 986336,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940289421.180, "dur": 24.278, + "args": { + "External id": 986337,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940289422.598, "dur": 1.135, + "args": { + "External id": 986338,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940289424.983, "dur": 20.162, + "args": { + "External id": 986339,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940289428.325, "dur": 2.794, + "args": { + "External id": 986340,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940289446.942, "dur": 23.587, + "args": { + "External id": 986341,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940289472.035, "dur": 16.989, + "args": { + "External id": 986342,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940289494.727, "dur": 15.530, + "args": { + "External id": 986343,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940289511.861, "dur": 16.233, + "args": { + "External id": 986344,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940289530.408, "dur": 26.159, + "args": { + "External id": 986345,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940289532.864, "dur": 1.784, + "args": { + "External id": 986346,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940289537.764, "dur": 2.520, + "args": { + "External id": 986347,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940289558.233, "dur": 14.379, + "args": { + "External id": 986348,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940289576.876, "dur": 12.271, + "args": { + "External id": 986349,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940289598.757, "dur": 1.962, + "args": { + "External id": 986350,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940289611.752, "dur": 4.958, + "args": { + "External id": 986351,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940289615.068, "dur": 0.653, + "args": { + "External id": 986352,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940289690.775, "dur": 59.378, + "args": { + "External id": 986353,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940289755.844, "dur": 5.401, + "args": { + "External id": 986354,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940289759.503, "dur": 0.565, + "args": { + "External id": 986355,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940289762.579, "dur": 27.198, + "args": { + "External id": 986356,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940289795.094, "dur": 9.082, + "args": { + "External id": 986357,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940289799.347, "dur": 3.996, + "args": { + "External id": 986358,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940289801.667, "dur": 1.445, + "args": { + "External id": 986359,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940289807.072, "dur": 45.648, + "args": { + "External id": 986360,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940289808.444, "dur": 43.552, + "args": { + "External id": 986361,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940289857.118, "dur": 15.420, + "args": { + "External id": 986362,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940289879.014, "dur": 6.204, + "args": { + "External id": 986363,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940289883.237, "dur": 0.780, + "args": { + "External id": 986364,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940289889.509, "dur": 55.544, + "args": { + "External id": 986365,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940289890.438, "dur": 3.989, + "args": { + "External id": 986366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940289891.198, "dur": 2.576, + "args": { + "External id": 986367,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940289892.851, "dur": 0.767, + "args": { + "External id": 986368,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940289895.238, "dur": 49.423, + "args": { + "External id": 986369,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940289895.867, "dur": 48.123, + "args": { + "External id": 986370,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940289952.788, "dur": 4.146, + "args": { + "External id": 986371,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940289955.079, "dur": 0.575, + "args": { + "External id": 986372,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940289963.090, "dur": 1.487, + "args": { + "External id": 986373,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940289973.759, "dur": 7.382, + "args": { + "External id": 986374,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940289976.091, "dur": 4.708, + "args": { + "External id": 986375,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940290146.161, "dur": 213.434, + "args": { + "External id": 986376,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940290150.966, "dur": 3.868, + "args": { + "External id": 986377,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940290157.018, "dur": 202.130, + "args": { + "External id": 986378,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940290158.791, "dur": 0.524, + "args": { + "External id": 986379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940290160.979, "dur": 25.164, + "args": { + "External id": 986380,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940290188.508, "dur": 6.176, + "args": { + "External id": 986381,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940290193.459, "dur": 0.898, + "args": { + "External id": 986382,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940290195.787, "dur": 27.755, + "args": { + "External id": 986383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940290197.660, "dur": 1.646, + "args": { + "External id": 986384,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940290200.346, "dur": 22.892, + "args": { + "External id": 986385,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940290206.187, "dur": 3.626, + "args": { + "External id": 986386,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940290225.238, "dur": 25.888, + "args": { + "External id": 986387,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940290253.088, "dur": 15.889, + "args": { + "External id": 986388,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940290272.926, "dur": 14.813, + "args": { + "External id": 986389,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940290289.214, "dur": 13.252, + "args": { + "External id": 986390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940290305.360, "dur": 22.083, + "args": { + "External id": 986391,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940290307.793, "dur": 1.765, + "args": { + "External id": 986392,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940290312.152, "dur": 0.593, + "args": { + "External id": 986393,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940290331.756, "dur": 13.078, + "args": { + "External id": 986394,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940290346.340, "dur": 11.792, + "args": { + "External id": 986395,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940290367.873, "dur": 2.573, + "args": { + "External id": 986396,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940290381.870, "dur": 5.001, + "args": { + "External id": 986397,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940290385.204, "dur": 0.657, + "args": { + "External id": 986398,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940290462.292, "dur": 67.117, + "args": { + "External id": 986399,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940290534.963, "dur": 7.442, + "args": { + "External id": 986400,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940290540.427, "dur": 0.745, + "args": { + "External id": 986401,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940290544.098, "dur": 26.671, + "args": { + "External id": 986402,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940290575.611, "dur": 14.089, + "args": { + "External id": 986403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940290577.299, "dur": 11.492, + "args": { + "External id": 986404,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940290586.542, "dur": 1.972, + "args": { + "External id": 986405,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940290592.984, "dur": 48.084, + "args": { + "External id": 986406,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940290594.583, "dur": 45.825, + "args": { + "External id": 986407,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940290647.934, "dur": 16.044, + "args": { + "External id": 986408,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940290671.081, "dur": 4.427, + "args": { + "External id": 986409,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940290673.972, "dur": 0.524, + "args": { + "External id": 986410,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940290680.274, "dur": 52.593, + "args": { + "External id": 986411,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940290681.084, "dur": 6.635, + "args": { + "External id": 986412,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940290681.828, "dur": 5.238, + "args": { + "External id": 986413,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940290686.070, "dur": 0.833, + "args": { + "External id": 986414,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940290688.419, "dur": 44.038, + "args": { + "External id": 986415,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940290689.145, "dur": 42.612, + "args": { + "External id": 986416,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940290738.342, "dur": 4.426, + "args": { + "External id": 986417,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940290740.477, "dur": 0.777, + "args": { + "External id": 986418,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940290749.185, "dur": 1.563, + "args": { + "External id": 986419,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940290759.768, "dur": 12.171, + "args": { + "External id": 986420,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940290764.573, "dur": 7.041, + "args": { + "External id": 986421,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940290874.639, "dur": 265.510, + "args": { + "External id": 986422,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940290877.121, "dur": 2.072, + "args": { + "External id": 986423,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940290881.908, "dur": 257.555, + "args": { + "External id": 986424,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940290883.032, "dur": 0.535, + "args": { + "External id": 986425,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940290885.212, "dur": 22.096, + "args": { + "External id": 986426,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940290909.429, "dur": 5.186, + "args": { + "External id": 986427,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940290912.339, "dur": 2.003, + "args": { + "External id": 986428,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940290918.585, "dur": 22.720, + "args": { + "External id": 986429,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940290919.941, "dur": 1.324, + "args": { + "External id": 986430,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940290922.580, "dur": 18.403, + "args": { + "External id": 986431,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940290925.836, "dur": 2.359, + "args": { + "External id": 986432,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940290945.177, "dur": 20.712, + "args": { + "External id": 986433,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940290967.575, "dur": 14.212, + "args": { + "External id": 986434,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940290985.482, "dur": 14.097, + "args": { + "External id": 986435,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940291001.117, "dur": 33.546, + "args": { + "External id": 986436,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940291038.242, "dur": 69.166, + "args": { + "External id": 986437,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940291042.749, "dur": 2.323, + "args": { + "External id": 986438,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940291050.859, "dur": 1.114, + "args": { + "External id": 986439,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940291110.789, "dur": 13.815, + "args": { + "External id": 986440,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940291126.174, "dur": 11.765, + "args": { + "External id": 986441,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940291152.003, "dur": 2.948, + "args": { + "External id": 986442,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940291166.731, "dur": 4.441, + "args": { + "External id": 986443,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940291169.479, "dur": 0.687, + "args": { + "External id": 986444,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940291259.123, "dur": 71.225, + "args": { + "External id": 986445,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940291336.427, "dur": 8.013, + "args": { + "External id": 986446,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940291342.442, "dur": 0.637, + "args": { + "External id": 986447,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940291345.987, "dur": 29.112, + "args": { + "External id": 986448,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940291380.337, "dur": 6.678, + "args": { + "External id": 986449,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940291382.323, "dur": 3.888, + "args": { + "External id": 986450,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940291384.799, "dur": 1.151, + "args": { + "External id": 986451,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940291390.239, "dur": 51.406, + "args": { + "External id": 986452,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940291394.436, "dur": 46.287, + "args": { + "External id": 986453,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940291446.542, "dur": 15.962, + "args": { + "External id": 986454,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940291469.612, "dur": 4.635, + "args": { + "External id": 986455,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940291472.679, "dur": 0.571, + "args": { + "External id": 986456,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940291478.949, "dur": 53.825, + "args": { + "External id": 986457,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940291479.988, "dur": 7.099, + "args": { + "External id": 986458,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940291481.403, "dur": 5.007, + "args": { + "External id": 986459,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940291485.515, "dur": 0.738, + "args": { + "External id": 986460,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940291488.050, "dur": 44.340, + "args": { + "External id": 986461,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940291488.831, "dur": 42.891, + "args": { + "External id": 986462,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940291537.543, "dur": 4.966, + "args": { + "External id": 986463,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940291540.201, "dur": 0.780, + "args": { + "External id": 986464,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940291548.831, "dur": 1.731, + "args": { + "External id": 986465,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940291562.286, "dur": 6.661, + "args": { + "External id": 986466,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940291564.324, "dur": 4.237, + "args": { + "External id": 986467,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940291669.548, "dur": 208.739, + "args": { + "External id": 986468,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940291673.141, "dur": 2.177, + "args": { + "External id": 986469,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940291677.045, "dur": 200.764, + "args": { + "External id": 986470,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940291678.552, "dur": 0.364, + "args": { + "External id": 986471,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940291686.277, "dur": 22.980, + "args": { + "External id": 986472,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940291713.662, "dur": 4.653, + "args": { + "External id": 986473,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940291717.308, "dur": 0.781, + "args": { + "External id": 986474,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940291719.299, "dur": 24.232, + "args": { + "External id": 986475,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940291720.993, "dur": 1.480, + "args": { + "External id": 986476,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940291723.941, "dur": 19.290, + "args": { + "External id": 986477,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940291727.142, "dur": 3.092, + "args": { + "External id": 986478,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940291745.089, "dur": 26.060, + "args": { + "External id": 986479,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940291772.911, "dur": 16.316, + "args": { + "External id": 986480,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940291792.555, "dur": 14.571, + "args": { + "External id": 986481,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940291808.811, "dur": 13.770, + "args": { + "External id": 986482,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940291827.646, "dur": 22.014, + "args": { + "External id": 986483,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940291830.060, "dur": 1.931, + "args": { + "External id": 986484,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940291834.566, "dur": 0.598, + "args": { + "External id": 986485,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940291851.815, "dur": 12.511, + "args": { + "External id": 986486,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940291865.567, "dur": 11.004, + "args": { + "External id": 986487,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940291885.697, "dur": 1.759, + "args": { + "External id": 986488,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940291899.602, "dur": 4.483, + "args": { + "External id": 986489,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940291902.567, "dur": 0.619, + "args": { + "External id": 986490,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940291979.521, "dur": 120.923, + "args": { + "External id": 986491,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940292109.197, "dur": 7.666, + "args": { + "External id": 986492,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940292113.669, "dur": 1.274, + "args": { + "External id": 986493,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940292118.548, "dur": 35.264, + "args": { + "External id": 986494,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940292160.440, "dur": 8.870, + "args": { + "External id": 986495,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940292162.321, "dur": 5.974, + "args": { + "External id": 986496,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940292166.665, "dur": 1.338, + "args": { + "External id": 986497,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940292172.590, "dur": 53.583, + "args": { + "External id": 986498,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940292173.846, "dur": 51.586, + "args": { + "External id": 986499,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940292230.767, "dur": 18.052, + "args": { + "External id": 986500,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940292256.385, "dur": 4.185, + "args": { + "External id": 986501,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940292259.020, "dur": 0.518, + "args": { + "External id": 986502,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940292267.788, "dur": 50.641, + "args": { + "External id": 986503,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940292268.899, "dur": 4.889, + "args": { + "External id": 986504,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940292269.979, "dur": 3.095, + "args": { + "External id": 986505,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940292271.867, "dur": 1.039, + "args": { + "External id": 986506,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940292274.624, "dur": 43.133, + "args": { + "External id": 986507,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940292275.101, "dur": 42.163, + "args": { + "External id": 986508,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940292323.344, "dur": 4.535, + "args": { + "External id": 986509,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940292325.618, "dur": 0.811, + "args": { + "External id": 986510,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940292337.628, "dur": 1.905, + "args": { + "External id": 986511,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940292349.111, "dur": 9.435, + "args": { + "External id": 986512,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940292351.943, "dur": 6.217, + "args": { + "External id": 986513,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940292463.162, "dur": 202.861, + "args": { + "External id": 986514,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940292465.784, "dur": 4.250, + "args": { + "External id": 986515,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940292474.054, "dur": 191.361, + "args": { + "External id": 986516,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940292475.913, "dur": 0.360, + "args": { + "External id": 986517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940292477.965, "dur": 24.847, + "args": { + "External id": 986518,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940292504.820, "dur": 3.502, + "args": { + "External id": 986519,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940292507.229, "dur": 0.787, + "args": { + "External id": 986520,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940292509.394, "dur": 25.475, + "args": { + "External id": 986521,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940292510.349, "dur": 1.732, + "args": { + "External id": 986522,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940292513.465, "dur": 21.085, + "args": { + "External id": 986523,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940292518.580, "dur": 2.897, + "args": { + "External id": 986524,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940292536.588, "dur": 21.359, + "args": { + "External id": 986525,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940292563.040, "dur": 14.797, + "args": { + "External id": 986526,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940292582.981, "dur": 13.688, + "args": { + "External id": 986527,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940292598.352, "dur": 13.492, + "args": { + "External id": 986528,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940292614.433, "dur": 20.876, + "args": { + "External id": 986529,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940292616.615, "dur": 1.789, + "args": { + "External id": 986530,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940292621.048, "dur": 0.532, + "args": { + "External id": 986531,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940292636.877, "dur": 12.311, + "args": { + "External id": 986532,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940292653.322, "dur": 10.898, + "args": { + "External id": 986533,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940292672.920, "dur": 1.707, + "args": { + "External id": 986534,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940292684.724, "dur": 4.385, + "args": { + "External id": 986535,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940292687.617, "dur": 0.680, + "args": { + "External id": 986536,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940292762.200, "dur": 56.281, + "args": { + "External id": 986537,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940292824.301, "dur": 6.886, + "args": { + "External id": 986538,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940292827.461, "dur": 2.319, + "args": { + "External id": 986539,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940292832.821, "dur": 25.212, + "args": { + "External id": 986540,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940292863.129, "dur": 8.345, + "args": { + "External id": 986541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940292867.321, "dur": 3.353, + "args": { + "External id": 986542,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940292869.504, "dur": 0.992, + "args": { + "External id": 986543,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940292874.209, "dur": 43.887, + "args": { + "External id": 986544,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940292875.493, "dur": 41.772, + "args": { + "External id": 986545,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940292922.485, "dur": 14.402, + "args": { + "External id": 986546,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940292942.200, "dur": 25.548, + "args": { + "External id": 986547,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940292944.966, "dur": 22.353, + "args": { + "External id": 986548,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940292950.523, "dur": 0.937, + "args": { + "External id": 986549,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 19449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345940292976.785, "dur": 49.170, + "args": { + "External id": 986550,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345940292979.429, "dur": 45.921, + "args": { + "External id": 986551,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 19451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940292984.949, "dur": 4.067, + "args": { + "External id": 986552,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940292990.559, "dur": 16.416, + "args": { + "External id": 986553,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2338710, + "ts": 6345940293044.754, "dur": 6.211, + "args": { + "External id": 986554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2338710, + "ts": 6345940293047.274, "dur": 3.380, + "args": { + "External id": 986555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2338710, + "ts": 6345940293052.585, "dur": 43.167, + "args": { + "External id": 986556,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2338710, + "ts": 6345940293092.349, "dur": 3.021, + "args": { + "External id": 986557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940293147.460, "dur": 31.066, + "args": { + "External id": 986558,"Sequence number": 10552672, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 19458 + } + }, + { + "ph": "s", "id": 247, "pid": 2338710, "tid": 2338710, "ts": 6345940293147.460, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940293187.100, "dur": 8.960, + "args": { + "External id": 986559,"Sequence number": 10552673, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 19459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940293192.160, "dur": 1.724, + "args": { + "External id": 986560,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2338710, + "ts": 6345940293199.265, "dur": 7.651, + "args": { + "External id": 986561,"Sequence number": 10552673, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "1"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 19461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940293204.597, "dur": 0.785, + "args": { + "External id": 986562,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "2"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940293211.262, "dur": 2.982, + "args": { + "External id": 986563,"Sequence number": 10552673, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 19463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940293212.986, "dur": 0.558, + "args": { + "External id": 986564,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "2"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 19464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940293219.471, "dur": 7.180, + "args": { + "External id": 986565,"Sequence number": 10552673, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19465 + } + }, + { + "ph": "s", "id": 246, "pid": 2338710, "tid": 2338710, "ts": 6345940293219.471, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940293224.115, "dur": 1.020, + "args": { + "External id": 986566,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940293227.902, "dur": 8.493, + "args": { + "External id": 986567,"Sequence number": 10552674, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19467 + } + }, + { + "ph": "s", "id": 245, "pid": 2338710, "tid": 2338710, "ts": 6345940293227.902, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940293234.915, "dur": 0.548, + "args": { + "External id": 986568,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2338710, + "ts": 6345940293237.826, "dur": 5.883, + "args": { + "External id": 986569,"Sequence number": 10552675, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 19469 + } + }, + { + "ph": "s", "id": 244, "pid": 2338710, "tid": 2338710, "ts": 6345940293237.826, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940293242.291, "dur": 0.451, + "args": { + "External id": 986570,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940293245.157, "dur": 7.298, + "args": { + "External id": 986571,"Sequence number": 10552676, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 19471 + } + }, + { + "ph": "s", "id": 243, "pid": 2338710, "tid": 2338710, "ts": 6345940293245.157, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940293248.693, "dur": 2.827, + "args": { + "External id": 986572,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 19472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338710, "tid": 2338710, + "ts": 6345940293256.867, "dur": 39.969, + "args": { + "External id": 986573,"Sequence number": 10552677, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2338710, + "ts": 6345940293258.902, "dur": 37.700, + "args": { + "External id": 986574,"Sequence number": 10552677, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940293262.230, "dur": 11.720, + "args": { + "External id": 986575,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 19475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940293267.789, "dur": 5.478, + "args": { + "External id": 986576,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940293275.474, "dur": 20.548, + "args": { + "External id": 986577,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 19477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940293327.744, "dur": 5.123, + "args": { + "External id": 986578,"Sequence number": 10552677, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 19478 + } + }, + { + "ph": "s", "id": 242, "pid": 2338710, "tid": 2338710, "ts": 6345940293327.744, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940293336.029, "dur": 2.271, + "args": { + "External id": 986579,"Sequence number": 10552678, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940293374.662, "dur": 46900.410, + "args": { + "External id": 986580,"Sequence number": 10552678, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 19480 + } + }, + { + "ph": "s", "id": 241, "pid": 2338710, "tid": 2338710, "ts": 6345940293374.662, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338710, "tid": 2338710, + "ts": 6345940293392.465, "dur": 31.535, + "args": { + "External id": 986581,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2338710, + "ts": 6345940293396.133, "dur": 27.590, + "args": { + "External id": 986582,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940293397.993, "dur": 5.417, + "args": { + "External id": 986583,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940293399.609, "dur": 3.372, + "args": { + "External id": 986584,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940293404.144, "dur": 19.032, + "args": { + "External id": 986585,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 19485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940293443.995, "dur": 29.885, + "args": { + "External id": 986586,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940293445.274, "dur": 7.090, + "args": { + "External id": 986587,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940293448.100, "dur": 3.883, + "args": { + "External id": 986588,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345940293454.429, "dur": 19.187, + "args": { + "External id": 986589,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940293456.462, "dur": 16.705, + "args": { + "External id": 986590,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940293478.001, "dur": 26.207, + "args": { + "External id": 986591,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940293479.380, "dur": 6.091, + "args": { + "External id": 986592,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940293481.532, "dur": 3.605, + "args": { + "External id": 986593,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345940293488.536, "dur": 15.433, + "args": { + "External id": 986594,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940293489.815, "dur": 13.776, + "args": { + "External id": 986595,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 19495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2338710, + "ts": 6345940293511.560, "dur": 21.650, + "args": { + "External id": 986596,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940293513.195, "dur": 4.970, + "args": { + "External id": 986597,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345940293518.866, "dur": 14.015, + "args": { + "External id": 986598,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 19498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940293520.078, "dur": 12.351, + "args": { + "External id": 986599,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338710, "tid": 2338710, + "ts": 6345940293539.016, "dur": 29.028, + "args": { + "External id": 986600,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940293571.313, "dur": 63.599, + "args": { + "External id": 986601,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940293576.440, "dur": 57.890, + "args": { + "External id": 986602,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940293581.622, "dur": 1.128, + "args": { + "External id": 986603,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 19503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345940293584.443, "dur": 28.000, + "args": { + "External id": 986604,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345940293586.367, "dur": 25.815, + "args": { + "External id": 986605,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 19505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940293591.084, "dur": 3.985, + "args": { + "External id": 986606,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940293596.147, "dur": 15.497, + "args": { + "External id": 986607,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 19507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338710, "tid": 2338710, + "ts": 6345940293639.432, "dur": 39848.598, + "args": { + "External id": 986608,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338710, "tid": 2338710, + "ts": 6345940293641.277, "dur": 39844.880, + "args": { + "External id": 986609,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940333507.204, "dur": 11.757, + "args": { + "External id": 986610,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940333514.263, "dur": 1.499, + "args": { + "External id": 986611,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940333530.179, "dur": 147.049, + "args": { + "External id": 986612,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940333532.886, "dur": 9.785, + "args": { + "External id": 986613,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940333536.032, "dur": 5.585, + "args": { + "External id": 986614,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940333538.843, "dur": 2.432, + "args": { + "External id": 986615,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940333544.353, "dur": 131.846, + "args": { + "External id": 986616,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940333547.076, "dur": 128.269, + "args": { + "External id": 986617,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940333681.776, "dur": 5.106, + "args": { + "External id": 986618,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940333684.426, "dur": 0.833, + "args": { + "External id": 986619,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940333699.430, "dur": 4.408, + "args": { + "External id": 986620,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940333716.650, "dur": 9.310, + "args": { + "External id": 986621,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940333720.446, "dur": 5.171, + "args": { + "External id": 986622,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940333895.570, "dur": 346.249, + "args": { + "External id": 986623,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940333900.392, "dur": 2.590, + "args": { + "External id": 986624,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940333931.034, "dur": 310.045, + "args": { + "External id": 986625,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940333935.124, "dur": 0.493, + "args": { + "External id": 986626,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940333937.451, "dur": 38.369, + "args": { + "External id": 986627,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940333978.049, "dur": 3.547, + "args": { + "External id": 986628,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940333980.502, "dur": 0.752, + "args": { + "External id": 986629,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940333982.755, "dur": 48.925, + "args": { + "External id": 986630,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940333985.619, "dur": 1.281, + "args": { + "External id": 986631,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940333988.540, "dur": 42.273, + "args": { + "External id": 986632,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940333992.949, "dur": 3.531, + "args": { + "External id": 986633,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940334036.903, "dur": 67.783, + "args": { + "External id": 986634,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940334108.500, "dur": 18.953, + "args": { + "External id": 986635,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940334131.807, "dur": 20.807, + "args": { + "External id": 986636,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940334154.250, "dur": 17.086, + "args": { + "External id": 986637,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940334174.235, "dur": 28.391, + "args": { + "External id": 986638,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940334177.192, "dur": 2.061, + "args": { + "External id": 986639,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940334181.832, "dur": 0.709, + "args": { + "External id": 986640,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940334207.750, "dur": 16.113, + "args": { + "External id": 986641,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940334225.356, "dur": 14.330, + "args": { + "External id": 986642,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940334252.922, "dur": 3.184, + "args": { + "External id": 986643,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940334265.530, "dur": 6.194, + "args": { + "External id": 986644,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940334270.114, "dur": 0.509, + "args": { + "External id": 986645,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940334376.357, "dur": 96.719, + "args": { + "External id": 986646,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940334480.086, "dur": 9.951, + "args": { + "External id": 986647,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940334483.570, "dur": 3.015, + "args": { + "External id": 986648,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940334494.691, "dur": 33.459, + "args": { + "External id": 986649,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940334535.272, "dur": 9.056, + "args": { + "External id": 986650,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940334537.747, "dur": 5.568, + "args": { + "External id": 986651,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940334541.670, "dur": 1.338, + "args": { + "External id": 986652,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940334548.159, "dur": 60.816, + "args": { + "External id": 986653,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940334549.638, "dur": 58.420, + "args": { + "External id": 986654,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940334614.353, "dur": 18.542, + "args": { + "External id": 986655,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940334643.516, "dur": 5.871, + "args": { + "External id": 986656,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940334647.425, "dur": 0.743, + "args": { + "External id": 986657,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940334654.761, "dur": 57.985, + "args": { + "External id": 986658,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940334656.089, "dur": 4.607, + "args": { + "External id": 986659,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940334657.337, "dur": 2.589, + "args": { + "External id": 986660,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940334658.969, "dur": 0.737, + "args": { + "External id": 986661,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940334664.244, "dur": 47.998, + "args": { + "External id": 986662,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940334664.836, "dur": 46.605, + "args": { + "External id": 986663,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940334718.486, "dur": 3.964, + "args": { + "External id": 986664,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940334720.556, "dur": 0.452, + "args": { + "External id": 986665,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940334729.769, "dur": 2.053, + "args": { + "External id": 986666,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940334742.402, "dur": 15.590, + "args": { + "External id": 986667,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940334751.085, "dur": 6.510, + "args": { + "External id": 986668,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940334875.912, "dur": 278.583, + "args": { + "External id": 986669,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940334878.594, "dur": 3.497, + "args": { + "External id": 986670,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940334884.182, "dur": 269.639, + "args": { + "External id": 986671,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940334886.083, "dur": 0.658, + "args": { + "External id": 986672,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940334888.182, "dur": 24.774, + "args": { + "External id": 986673,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940334915.203, "dur": 3.356, + "args": { + "External id": 986674,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940334917.513, "dur": 0.792, + "args": { + "External id": 986675,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940334919.872, "dur": 30.351, + "args": { + "External id": 986676,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940334923.721, "dur": 3.675, + "args": { + "External id": 986677,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940334928.769, "dur": 21.094, + "args": { + "External id": 986678,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940334933.607, "dur": 2.644, + "args": { + "External id": 986679,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940334952.461, "dur": 22.979, + "args": { + "External id": 986680,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940334977.695, "dur": 14.524, + "args": { + "External id": 986681,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940334995.162, "dur": 33.231, + "args": { + "External id": 986682,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940335032.354, "dur": 17.666, + "args": { + "External id": 986683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940335090.003, "dur": 33.559, + "args": { + "External id": 986684,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940335094.820, "dur": 3.676, + "args": { + "External id": 986685,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940335103.386, "dur": 0.828, + "args": { + "External id": 986686,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940335125.733, "dur": 13.933, + "args": { + "External id": 986687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940335140.903, "dur": 11.450, + "args": { + "External id": 986688,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940335165.232, "dur": 2.824, + "args": { + "External id": 986689,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940335179.853, "dur": 5.623, + "args": { + "External id": 986690,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940335182.735, "dur": 0.862, + "args": { + "External id": 986691,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940335278.337, "dur": 75.950, + "args": { + "External id": 986692,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940335360.599, "dur": 8.558, + "args": { + "External id": 986693,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940335366.671, "dur": 0.975, + "args": { + "External id": 986694,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940335371.110, "dur": 31.362, + "args": { + "External id": 986695,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940335408.038, "dur": 6.347, + "args": { + "External id": 986696,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940335409.869, "dur": 3.595, + "args": { + "External id": 986697,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940335412.292, "dur": 0.959, + "args": { + "External id": 986698,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940335418.251, "dur": 50.893, + "args": { + "External id": 986699,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940335419.317, "dur": 49.088, + "args": { + "External id": 986700,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940335476.311, "dur": 20.143, + "args": { + "External id": 986701,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940335503.745, "dur": 4.939, + "args": { + "External id": 986702,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940335506.835, "dur": 0.711, + "args": { + "External id": 986703,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940335513.764, "dur": 58.155, + "args": { + "External id": 986704,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940335515.066, "dur": 8.878, + "args": { + "External id": 986705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940335515.952, "dur": 7.241, + "args": { + "External id": 986706,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940335520.221, "dur": 2.731, + "args": { + "External id": 986707,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940335524.855, "dur": 46.599, + "args": { + "External id": 986708,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940335525.766, "dur": 45.030, + "args": { + "External id": 986709,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940335577.065, "dur": 4.481, + "args": { + "External id": 986710,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940335579.581, "dur": 0.450, + "args": { + "External id": 986711,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940335588.185, "dur": 1.433, + "args": { + "External id": 986712,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940335599.540, "dur": 12.183, + "args": { + "External id": 986713,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940335604.675, "dur": 6.606, + "args": { + "External id": 986714,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940335727.302, "dur": 201.279, + "args": { + "External id": 986715,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940335730.216, "dur": 2.150, + "args": { + "External id": 986716,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940335734.119, "dur": 193.941, + "args": { + "External id": 986717,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940335735.863, "dur": 0.529, + "args": { + "External id": 986718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940335739.089, "dur": 24.536, + "args": { + "External id": 986719,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940335765.677, "dur": 3.558, + "args": { + "External id": 986720,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940335768.164, "dur": 0.771, + "args": { + "External id": 986721,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940335773.069, "dur": 26.247, + "args": { + "External id": 986722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940335774.572, "dur": 2.959, + "args": { + "External id": 986723,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940335778.977, "dur": 20.005, + "args": { + "External id": 986724,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940335782.069, "dur": 2.902, + "args": { + "External id": 986725,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940335801.492, "dur": 22.993, + "args": { + "External id": 986726,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940335826.252, "dur": 14.096, + "args": { + "External id": 986727,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940335843.575, "dur": 15.598, + "args": { + "External id": 986728,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940335860.677, "dur": 13.061, + "args": { + "External id": 986729,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940335875.813, "dur": 24.163, + "args": { + "External id": 986730,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940335880.525, "dur": 1.851, + "args": { + "External id": 986731,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940335884.639, "dur": 0.744, + "args": { + "External id": 986732,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940335901.684, "dur": 12.515, + "args": { + "External id": 986733,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940335915.326, "dur": 11.426, + "args": { + "External id": 986734,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940335936.919, "dur": 2.005, + "args": { + "External id": 986735,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940335949.386, "dur": 4.200, + "args": { + "External id": 986736,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940335951.909, "dur": 0.541, + "args": { + "External id": 986737,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940336094.358, "dur": 79.820, + "args": { + "External id": 986738,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940336181.193, "dur": 7.400, + "args": { + "External id": 986739,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940336185.081, "dur": 1.304, + "args": { + "External id": 986740,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940336190.192, "dur": 33.310, + "args": { + "External id": 986741,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940336229.540, "dur": 11.269, + "args": { + "External id": 986742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940336231.577, "dur": 8.327, + "args": { + "External id": 986743,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940336236.397, "dur": 3.164, + "args": { + "External id": 986744,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940336244.541, "dur": 49.838, + "args": { + "External id": 986745,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940336246.047, "dur": 47.324, + "args": { + "External id": 986746,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940336299.464, "dur": 17.425, + "args": { + "External id": 986747,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940336324.647, "dur": 4.654, + "args": { + "External id": 986748,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940336327.509, "dur": 0.536, + "args": { + "External id": 986749,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940336334.267, "dur": 52.626, + "args": { + "External id": 986750,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940336337.578, "dur": 4.024, + "args": { + "External id": 986751,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940336338.300, "dur": 2.503, + "args": { + "External id": 986752,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940336340.247, "dur": 0.395, + "args": { + "External id": 986753,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940336342.412, "dur": 44.019, + "args": { + "External id": 986754,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940336342.995, "dur": 42.722, + "args": { + "External id": 986755,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940336391.950, "dur": 4.673, + "args": { + "External id": 986756,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940336394.521, "dur": 0.700, + "args": { + "External id": 986757,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940336407.102, "dur": 2.019, + "args": { + "External id": 986758,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940336420.400, "dur": 10.440, + "args": { + "External id": 986759,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940336422.829, "dur": 7.600, + "args": { + "External id": 986760,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940336540.721, "dur": 204.298, + "args": { + "External id": 986761,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940336542.967, "dur": 2.194, + "args": { + "External id": 986762,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940336549.252, "dur": 195.203, + "args": { + "External id": 986763,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940336553.213, "dur": 0.521, + "args": { + "External id": 986764,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940336555.029, "dur": 23.945, + "args": { + "External id": 986765,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940336581.028, "dur": 5.166, + "args": { + "External id": 986766,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940336583.454, "dur": 2.396, + "args": { + "External id": 986767,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940336587.440, "dur": 22.170, + "args": { + "External id": 986768,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940336589.052, "dur": 1.129, + "args": { + "External id": 986769,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940336591.592, "dur": 17.681, + "args": { + "External id": 986770,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940336594.541, "dur": 2.535, + "args": { + "External id": 986771,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940336611.128, "dur": 21.450, + "args": { + "External id": 986772,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940336634.389, "dur": 14.772, + "args": { + "External id": 986773,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940336654.494, "dur": 14.953, + "args": { + "External id": 986774,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940336670.983, "dur": 13.278, + "args": { + "External id": 986775,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940336686.477, "dur": 27.423, + "args": { + "External id": 986776,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940336689.066, "dur": 2.335, + "args": { + "External id": 986777,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940336696.749, "dur": 2.491, + "args": { + "External id": 986778,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940336715.787, "dur": 12.814, + "args": { + "External id": 986779,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940336732.406, "dur": 10.709, + "args": { + "External id": 986780,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940336752.553, "dur": 1.999, + "args": { + "External id": 986781,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940336764.616, "dur": 3.687, + "args": { + "External id": 986782,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940336766.867, "dur": 0.465, + "args": { + "External id": 986783,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940336844.480, "dur": 59.344, + "args": { + "External id": 986784,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940336909.419, "dur": 5.991, + "args": { + "External id": 986785,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940336913.155, "dur": 0.810, + "args": { + "External id": 986786,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940336916.855, "dur": 25.392, + "args": { + "External id": 986787,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940336947.211, "dur": 8.153, + "args": { + "External id": 986788,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940336951.728, "dur": 2.773, + "args": { + "External id": 986789,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940336953.427, "dur": 0.874, + "args": { + "External id": 986790,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940336958.397, "dur": 45.107, + "args": { + "External id": 986791,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940336959.473, "dur": 43.418, + "args": { + "External id": 986792,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940337028.267, "dur": 20.067, + "args": { + "External id": 986793,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940337094.893, "dur": 10.220, + "args": { + "External id": 986794,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940337102.387, "dur": 0.921, + "args": { + "External id": 986795,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940337110.772, "dur": 59.004, + "args": { + "External id": 986796,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940337111.927, "dur": 4.942, + "args": { + "External id": 986797,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940337113.154, "dur": 3.023, + "args": { + "External id": 986798,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940337115.042, "dur": 0.963, + "args": { + "External id": 986799,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940337117.670, "dur": 51.617, + "args": { + "External id": 986800,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940337118.405, "dur": 50.122, + "args": { + "External id": 986801,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940337177.382, "dur": 4.652, + "args": { + "External id": 986802,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940337180.003, "dur": 0.696, + "args": { + "External id": 986803,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940337188.997, "dur": 1.818, + "args": { + "External id": 986804,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940337201.326, "dur": 10.625, + "args": { + "External id": 986805,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940337203.777, "dur": 7.840, + "args": { + "External id": 986806,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940337323.190, "dur": 203.438, + "args": { + "External id": 986807,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940337327.957, "dur": 2.593, + "args": { + "External id": 986808,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940337332.497, "dur": 193.578, + "args": { + "External id": 986809,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940337333.713, "dur": 0.374, + "args": { + "External id": 986810,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940337335.290, "dur": 25.346, + "args": { + "External id": 986811,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940337362.743, "dur": 5.012, + "args": { + "External id": 986812,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940337366.104, "dur": 1.339, + "args": { + "External id": 986813,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940337368.899, "dur": 27.172, + "args": { + "External id": 986814,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940337370.482, "dur": 1.307, + "args": { + "External id": 986815,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940337373.090, "dur": 22.591, + "args": { + "External id": 986816,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940337379.064, "dur": 3.253, + "args": { + "External id": 986817,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940337397.467, "dur": 22.628, + "args": { + "External id": 986818,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940337421.940, "dur": 14.319, + "args": { + "External id": 986819,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940337439.398, "dur": 14.067, + "args": { + "External id": 986820,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940337455.019, "dur": 13.403, + "args": { + "External id": 986821,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940337470.633, "dur": 24.122, + "args": { + "External id": 986822,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940337475.037, "dur": 1.972, + "args": { + "External id": 986823,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940337479.527, "dur": 0.693, + "args": { + "External id": 986824,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940337499.347, "dur": 12.504, + "args": { + "External id": 986825,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940337513.301, "dur": 11.716, + "args": { + "External id": 986826,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940337534.896, "dur": 1.863, + "args": { + "External id": 986827,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940337546.828, "dur": 4.547, + "args": { + "External id": 986828,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940337550.127, "dur": 0.371, + "args": { + "External id": 986829,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940337624.619, "dur": 57.825, + "args": { + "External id": 986830,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940337688.660, "dur": 8.031, + "args": { + "External id": 986831,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940337693.984, "dur": 1.211, + "args": { + "External id": 986832,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940337698.413, "dur": 27.882, + "args": { + "External id": 986833,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940337730.962, "dur": 6.242, + "args": { + "External id": 986834,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940337732.511, "dur": 3.817, + "args": { + "External id": 986835,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940337734.645, "dur": 1.473, + "args": { + "External id": 986836,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940337740.602, "dur": 44.743, + "args": { + "External id": 986837,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940337741.762, "dur": 42.700, + "args": { + "External id": 986838,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940337792.452, "dur": 15.926, + "args": { + "External id": 986839,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940337815.147, "dur": 4.157, + "args": { + "External id": 986840,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940337817.808, "dur": 0.450, + "args": { + "External id": 986841,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940337823.606, "dur": 53.538, + "args": { + "External id": 986842,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940337824.460, "dur": 9.066, + "args": { + "External id": 986843,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940337825.292, "dur": 7.613, + "args": { + "External id": 986844,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940337829.638, "dur": 3.067, + "args": { + "External id": 986845,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940337834.510, "dur": 42.195, + "args": { + "External id": 986846,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940337835.311, "dur": 40.738, + "args": { + "External id": 986847,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940337882.621, "dur": 4.043, + "args": { + "External id": 986848,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940337884.838, "dur": 0.399, + "args": { + "External id": 986849,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940337892.459, "dur": 1.428, + "args": { + "External id": 986850,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940337902.542, "dur": 8.541, + "args": { + "External id": 986851,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940337907.061, "dur": 3.656, + "args": { + "External id": 986852,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940338001.721, "dur": 277.249, + "args": { + "External id": 986853,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940338005.332, "dur": 20.385, + "args": { + "External id": 986854,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940338028.545, "dur": 249.794, + "args": { + "External id": 986855,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940338030.196, "dur": 0.342, + "args": { + "External id": 986856,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940338032.182, "dur": 64.982, + "args": { + "External id": 986857,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940338100.406, "dur": 5.299, + "args": { + "External id": 986858,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940338104.393, "dur": 0.998, + "args": { + "External id": 986859,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940338109.025, "dur": 25.196, + "args": { + "External id": 986860,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940338110.307, "dur": 2.063, + "args": { + "External id": 986861,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940338113.479, "dur": 20.463, + "args": { + "External id": 986862,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940338117.174, "dur": 3.124, + "args": { + "External id": 986863,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940338135.751, "dur": 28.039, + "args": { + "External id": 986864,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940338165.774, "dur": 15.659, + "args": { + "External id": 986865,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940338184.793, "dur": 16.141, + "args": { + "External id": 986866,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940338202.530, "dur": 14.812, + "args": { + "External id": 986867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940338219.426, "dur": 26.933, + "args": { + "External id": 986868,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940338222.078, "dur": 1.946, + "args": { + "External id": 986869,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940338228.652, "dur": 0.621, + "args": { + "External id": 986870,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940338248.124, "dur": 14.445, + "args": { + "External id": 986871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940338263.819, "dur": 12.920, + "args": { + "External id": 986872,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940338289.060, "dur": 2.616, + "args": { + "External id": 986873,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940338302.479, "dur": 4.509, + "args": { + "External id": 986874,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940338305.448, "dur": 0.605, + "args": { + "External id": 986875,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940338389.711, "dur": 70.656, + "args": { + "External id": 986876,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940338466.113, "dur": 8.214, + "args": { + "External id": 986877,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940338471.943, "dur": 0.939, + "args": { + "External id": 986878,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940338476.152, "dur": 26.252, + "args": { + "External id": 986879,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940338507.445, "dur": 6.777, + "args": { + "External id": 986880,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940338509.229, "dur": 4.163, + "args": { + "External id": 986881,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940338511.306, "dur": 1.840, + "args": { + "External id": 986882,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940338517.252, "dur": 51.594, + "args": { + "External id": 986883,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940338520.682, "dur": 47.317, + "args": { + "External id": 986884,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940338573.403, "dur": 17.690, + "args": { + "External id": 986885,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940338597.804, "dur": 4.301, + "args": { + "External id": 986886,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940338600.502, "dur": 0.483, + "args": { + "External id": 986887,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940338606.734, "dur": 52.605, + "args": { + "External id": 986888,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940338607.890, "dur": 6.924, + "args": { + "External id": 986889,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940338608.818, "dur": 5.314, + "args": { + "External id": 986890,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940338613.371, "dur": 0.603, + "args": { + "External id": 986891,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940338615.667, "dur": 43.229, + "args": { + "External id": 986892,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940338616.183, "dur": 42.073, + "args": { + "External id": 986893,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940338664.342, "dur": 6.100, + "args": { + "External id": 986894,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940338666.767, "dur": 2.087, + "args": { + "External id": 986895,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940338676.669, "dur": 1.574, + "args": { + "External id": 986896,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940338689.927, "dur": 8.891, + "args": { + "External id": 986897,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940338691.974, "dur": 6.458, + "args": { + "External id": 986898,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940338790.646, "dur": 186.539, + "args": { + "External id": 986899,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940338793.272, "dur": 1.923, + "args": { + "External id": 986900,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940338796.721, "dur": 179.937, + "args": { + "External id": 986901,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940338797.914, "dur": 0.399, + "args": { + "External id": 986902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940338799.704, "dur": 22.994, + "args": { + "External id": 986903,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940338826.809, "dur": 4.169, + "args": { + "External id": 986904,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940338829.960, "dur": 0.830, + "args": { + "External id": 986905,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940338831.814, "dur": 22.438, + "args": { + "External id": 986906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940338833.069, "dur": 1.112, + "args": { + "External id": 986907,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940338835.220, "dur": 18.566, + "args": { + "External id": 986908,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940338839.232, "dur": 2.882, + "args": { + "External id": 986909,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940338855.779, "dur": 19.452, + "args": { + "External id": 986910,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940338877.013, "dur": 12.893, + "args": { + "External id": 986911,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940338892.695, "dur": 14.026, + "args": { + "External id": 986912,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940338910.222, "dur": 12.600, + "args": { + "External id": 986913,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940338927.208, "dur": 19.794, + "args": { + "External id": 986914,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940338929.530, "dur": 1.393, + "args": { + "External id": 986915,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940338933.103, "dur": 0.609, + "args": { + "External id": 986916,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940338948.295, "dur": 15.561, + "args": { + "External id": 986917,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940338964.978, "dur": 10.541, + "args": { + "External id": 986918,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940338984.435, "dur": 1.820, + "args": { + "External id": 986919,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940338997.462, "dur": 3.967, + "args": { + "External id": 986920,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940339000.156, "dur": 0.386, + "args": { + "External id": 986921,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940339141.663, "dur": 76.159, + "args": { + "External id": 986922,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940339224.146, "dur": 9.132, + "args": { + "External id": 986923,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940339228.268, "dur": 2.905, + "args": { + "External id": 986924,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940339234.970, "dur": 30.291, + "args": { + "External id": 986925,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940339270.885, "dur": 8.588, + "args": { + "External id": 986926,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940339272.655, "dur": 5.876, + "args": { + "External id": 986927,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940339277.379, "dur": 0.906, + "args": { + "External id": 986928,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940339282.546, "dur": 48.432, + "args": { + "External id": 986929,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940339283.633, "dur": 46.556, + "args": { + "External id": 986930,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940339335.704, "dur": 18.564, + "args": { + "External id": 986931,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940339361.296, "dur": 4.250, + "args": { + "External id": 986932,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940339364.048, "dur": 0.544, + "args": { + "External id": 986933,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940339372.570, "dur": 48.648, + "args": { + "External id": 986934,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940339373.725, "dur": 3.683, + "args": { + "External id": 986935,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940339374.496, "dur": 2.267, + "args": { + "External id": 986936,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940339376.063, "dur": 0.530, + "args": { + "External id": 986937,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940339378.038, "dur": 42.805, + "args": { + "External id": 986938,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940339378.505, "dur": 41.736, + "args": { + "External id": 986939,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940339426.583, "dur": 4.815, + "args": { + "External id": 986940,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940339428.979, "dur": 1.062, + "args": { + "External id": 986941,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940339441.380, "dur": 1.940, + "args": { + "External id": 986942,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940339452.803, "dur": 9.504, + "args": { + "External id": 986943,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940339455.163, "dur": 6.734, + "args": { + "External id": 986944,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940339569.520, "dur": 220.706, + "args": { + "External id": 986945,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940339572.339, "dur": 5.217, + "args": { + "External id": 986946,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940339581.669, "dur": 207.838, + "args": { + "External id": 986947,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940339583.120, "dur": 0.429, + "args": { + "External id": 986948,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940339585.145, "dur": 25.483, + "args": { + "External id": 986949,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940339612.380, "dur": 5.646, + "args": { + "External id": 986950,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940339617.192, "dur": 0.593, + "args": { + "External id": 986951,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940339618.915, "dur": 24.933, + "args": { + "External id": 986952,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940339620.169, "dur": 1.266, + "args": { + "External id": 986953,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940339622.868, "dur": 20.553, + "args": { + "External id": 986954,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940339625.810, "dur": 2.353, + "args": { + "External id": 986955,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940339645.624, "dur": 24.236, + "args": { + "External id": 986956,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940339671.485, "dur": 18.421, + "args": { + "External id": 986957,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940339694.917, "dur": 16.242, + "args": { + "External id": 986958,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940339712.895, "dur": 15.168, + "args": { + "External id": 986959,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940339730.754, "dur": 25.319, + "args": { + "External id": 986960,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940339735.438, "dur": 1.836, + "args": { + "External id": 986961,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940339739.618, "dur": 0.501, + "args": { + "External id": 986962,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940339757.598, "dur": 14.455, + "args": { + "External id": 986963,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940339775.277, "dur": 12.858, + "args": { + "External id": 986964,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940339797.291, "dur": 1.854, + "args": { + "External id": 986965,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940339808.235, "dur": 4.163, + "args": { + "External id": 986966,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940339811.050, "dur": 0.415, + "args": { + "External id": 986967,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940339884.625, "dur": 58.557, + "args": { + "External id": 986968,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940339948.798, "dur": 5.063, + "args": { + "External id": 986969,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940339952.053, "dur": 0.545, + "args": { + "External id": 986970,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940339955.203, "dur": 26.810, + "args": { + "External id": 986971,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940339986.681, "dur": 8.920, + "args": { + "External id": 986972,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940339991.151, "dur": 3.713, + "args": { + "External id": 986973,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940339993.144, "dur": 1.517, + "args": { + "External id": 986974,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940339998.120, "dur": 115.745, + "args": { + "External id": 986975,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940339999.235, "dur": 113.069, + "args": { + "External id": 986976,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940340120.946, "dur": 21.359, + "args": { + "External id": 986977,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940340148.621, "dur": 30.381, + "args": { + "External id": 986978,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940340151.348, "dur": 27.151, + "args": { + "External id": 986979,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940340158.250, "dur": 0.959, + "args": { + "External id": 986980,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 19880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345940340185.545, "dur": 37.834, + "args": { + "External id": 986981,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345940340190.295, "dur": 32.830, + "args": { + "External id": 986982,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 19882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940340195.837, "dur": 4.568, + "args": { + "External id": 986983,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940340201.813, "dur": 20.730, + "args": { + "External id": 986984,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2338710, + "ts": 6345940340238.388, "dur": 5.867, + "args": { + "External id": 986985,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2338710, + "ts": 6345940340240.526, "dur": 3.349, + "args": { + "External id": 986986,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2338710, + "ts": 6345940340245.501, "dur": 3.535, + "args": { + "External id": 986987,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2338710, + "ts": 6345940340248.279, "dur": 0.601, + "args": { + "External id": 986988,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940340294.788, "dur": 26.931, + "args": { + "External id": 986989,"Sequence number": 10552679, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 19889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940340323.867, "dur": 17.818, + "args": { + "External id": 986990,"Sequence number": 10552680, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 19890 + } + }, + { + "ph": "s", "id": 240, "pid": 2338710, "tid": 2338710, "ts": 6345940340323.867, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940340348.984, "dur": 7.941, + "args": { + "External id": 986991,"Sequence number": 10552681, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 19891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940340353.786, "dur": 1.259, + "args": { + "External id": 986992,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2338710, + "ts": 6345940340359.816, "dur": 7.365, + "args": { + "External id": 986993,"Sequence number": 10552681, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "2"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 19893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940340365.113, "dur": 0.510, + "args": { + "External id": 986994,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "3"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940340371.463, "dur": 3.038, + "args": { + "External id": 986995,"Sequence number": 10552681, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 19895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940340373.330, "dur": 0.397, + "args": { + "External id": 986996,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "3"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 19896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940340379.254, "dur": 6.485, + "args": { + "External id": 986997,"Sequence number": 10552681, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19897 + } + }, + { + "ph": "s", "id": 239, "pid": 2338710, "tid": 2338710, "ts": 6345940340379.254, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940340383.418, "dur": 0.816, + "args": { + "External id": 986998,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940340387.065, "dur": 8.149, + "args": { + "External id": 986999,"Sequence number": 10552682, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19899 + } + }, + { + "ph": "s", "id": 238, "pid": 2338710, "tid": 2338710, "ts": 6345940340387.065, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940340393.622, "dur": 0.497, + "args": { + "External id": 987000,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2338710, + "ts": 6345940340396.486, "dur": 6.160, + "args": { + "External id": 987001,"Sequence number": 10552683, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 19901 + } + }, + { + "ph": "s", "id": 237, "pid": 2338710, "tid": 2338710, "ts": 6345940340396.486, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940340400.741, "dur": 0.825, + "args": { + "External id": 987002,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940340403.746, "dur": 8.221, + "args": { + "External id": 987003,"Sequence number": 10552684, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 19903 + } + }, + { + "ph": "s", "id": 236, "pid": 2338710, "tid": 2338710, "ts": 6345940340403.746, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940340407.491, "dur": 3.482, + "args": { + "External id": 987004,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 19904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338710, "tid": 2338710, + "ts": 6345940340416.759, "dur": 38.209, + "args": { + "External id": 987005,"Sequence number": 10552685, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2338710, + "ts": 6345940340418.572, "dur": 36.124, + "args": { + "External id": 987006,"Sequence number": 10552685, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940340421.676, "dur": 11.575, + "args": { + "External id": 987007,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 19907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940340427.139, "dur": 5.398, + "args": { + "External id": 987008,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940340434.406, "dur": 19.693, + "args": { + "External id": 987009,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 19909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940340485.560, "dur": 5.099, + "args": { + "External id": 987010,"Sequence number": 10552685, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 19910 + } + }, + { + "ph": "s", "id": 235, "pid": 2338710, "tid": 2338710, "ts": 6345940340485.560, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940340493.526, "dur": 1.233, + "args": { + "External id": 987011,"Sequence number": 10552686, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940340533.321, "dur": 46508.926, + "args": { + "External id": 987012,"Sequence number": 10552686, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 19912 + } + }, + { + "ph": "s", "id": 234, "pid": 2338710, "tid": 2338710, "ts": 6345940340533.321, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338710, "tid": 2338710, + "ts": 6345940340552.135, "dur": 39.276, + "args": { + "External id": 987013,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2338710, + "ts": 6345940340555.614, "dur": 35.568, + "args": { + "External id": 987014,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940340557.305, "dur": 5.409, + "args": { + "External id": 987015,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940340558.816, "dur": 3.432, + "args": { + "External id": 987016,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940340568.492, "dur": 22.266, + "args": { + "External id": 987017,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 19917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940340612.045, "dur": 30.442, + "args": { + "External id": 987018,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940340613.633, "dur": 6.650, + "args": { + "External id": 987019,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940340616.037, "dur": 3.916, + "args": { + "External id": 987020,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345940340621.869, "dur": 20.354, + "args": { + "External id": 987021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940340624.448, "dur": 17.254, + "args": { + "External id": 987022,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940340646.493, "dur": 28.800, + "args": { + "External id": 987023,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940340647.476, "dur": 7.731, + "args": { + "External id": 987024,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940340651.578, "dur": 3.252, + "args": { + "External id": 987025,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345940340655.933, "dur": 19.109, + "args": { + "External id": 987026,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940340656.628, "dur": 17.945, + "args": { + "External id": 987027,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 19927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2338710, + "ts": 6345940340682.569, "dur": 21.448, + "args": { + "External id": 987028,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940340684.409, "dur": 3.374, + "args": { + "External id": 987029,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345940340688.488, "dur": 15.195, + "args": { + "External id": 987030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 19930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940340689.533, "dur": 13.572, + "args": { + "External id": 987031,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338710, "tid": 2338710, + "ts": 6345940340709.395, "dur": 29.221, + "args": { + "External id": 987032,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940340744.924, "dur": 56.283, + "args": { + "External id": 987033,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940340746.993, "dur": 53.681, + "args": { + "External id": 987034,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940340751.787, "dur": 1.083, + "args": { + "External id": 987035,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 19935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345940340754.159, "dur": 28.095, + "args": { + "External id": 987036,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345940340757.437, "dur": 24.537, + "args": { + "External id": 987037,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 19937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940340760.313, "dur": 3.104, + "args": { + "External id": 987038,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940340764.382, "dur": 17.133, + "args": { + "External id": 987039,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 19939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338710, "tid": 2338710, + "ts": 6345940340805.628, "dur": 39442.586, + "args": { + "External id": 987040,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338710, "tid": 2338710, + "ts": 6345940340807.210, "dur": 39439.622, + "args": { + "External id": 987041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940380263.375, "dur": 13.893, + "args": { + "External id": 987042,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940380272.886, "dur": 1.085, + "args": { + "External id": 987043,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940380284.268, "dur": 132.888, + "args": { + "External id": 987044,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940380286.236, "dur": 9.825, + "args": { + "External id": 987045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940380290.427, "dur": 4.557, + "args": { + "External id": 987046,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940380292.762, "dur": 1.910, + "args": { + "External id": 987047,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940380297.509, "dur": 118.669, + "args": { + "External id": 987048,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940380299.616, "dur": 115.750, + "args": { + "External id": 987049,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940380426.041, "dur": 6.618, + "args": { + "External id": 987050,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940380430.172, "dur": 0.820, + "args": { + "External id": 987051,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940380443.503, "dur": 3.635, + "args": { + "External id": 987052,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940380459.059, "dur": 7.801, + "args": { + "External id": 987053,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940380462.452, "dur": 4.036, + "args": { + "External id": 987054,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940380656.576, "dur": 264.147, + "args": { + "External id": 987055,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940380660.744, "dur": 5.436, + "args": { + "External id": 987056,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940380668.961, "dur": 251.002, + "args": { + "External id": 987057,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940380672.061, "dur": 0.674, + "args": { + "External id": 987058,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940380674.382, "dur": 36.090, + "args": { + "External id": 987059,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940380713.416, "dur": 6.594, + "args": { + "External id": 987060,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940380717.627, "dur": 2.030, + "args": { + "External id": 987061,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940380721.275, "dur": 35.660, + "args": { + "External id": 987062,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940380722.977, "dur": 1.412, + "args": { + "External id": 987063,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940380726.970, "dur": 29.643, + "args": { + "External id": 987064,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940380733.682, "dur": 4.883, + "args": { + "External id": 987065,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940380760.250, "dur": 28.625, + "args": { + "External id": 987066,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940380792.212, "dur": 18.225, + "args": { + "External id": 987067,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940380815.197, "dur": 18.201, + "args": { + "External id": 987068,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940380836.132, "dur": 15.737, + "args": { + "External id": 987069,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940380854.331, "dur": 28.159, + "args": { + "External id": 987070,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940380857.178, "dur": 1.849, + "args": { + "External id": 987071,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940380862.972, "dur": 2.406, + "args": { + "External id": 987072,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940380887.589, "dur": 15.866, + "args": { + "External id": 987073,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940380905.194, "dur": 13.463, + "args": { + "External id": 987074,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940380931.400, "dur": 2.290, + "args": { + "External id": 987075,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940380943.212, "dur": 5.905, + "args": { + "External id": 987076,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940380947.259, "dur": 0.677, + "args": { + "External id": 987077,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940381103.852, "dur": 96.412, + "args": { + "External id": 987078,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940381209.425, "dur": 14.485, + "args": { + "External id": 987079,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940381217.833, "dur": 1.398, + "args": { + "External id": 987080,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940381225.713, "dur": 37.716, + "args": { + "External id": 987081,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940381270.465, "dur": 9.065, + "args": { + "External id": 987082,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940381272.704, "dur": 5.954, + "args": { + "External id": 987083,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940381276.637, "dur": 1.674, + "args": { + "External id": 987084,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940381283.630, "dur": 58.469, + "args": { + "External id": 987085,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940381284.976, "dur": 56.436, + "args": { + "External id": 987086,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940381350.091, "dur": 21.357, + "args": { + "External id": 987087,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940381384.747, "dur": 6.914, + "args": { + "External id": 987088,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940381389.548, "dur": 0.661, + "args": { + "External id": 987089,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940381397.150, "dur": 57.151, + "args": { + "External id": 987090,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940381398.436, "dur": 6.923, + "args": { + "External id": 987091,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940381399.518, "dur": 5.119, + "args": { + "External id": 987092,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940381403.545, "dur": 0.900, + "args": { + "External id": 987093,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940381406.091, "dur": 47.714, + "args": { + "External id": 987094,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940381406.790, "dur": 46.301, + "args": { + "External id": 987095,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940381459.225, "dur": 4.210, + "args": { + "External id": 987096,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940381461.235, "dur": 0.541, + "args": { + "External id": 987097,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940381471.166, "dur": 1.955, + "args": { + "External id": 987098,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940381485.771, "dur": 10.621, + "args": { + "External id": 987099,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940381489.283, "dur": 6.686, + "args": { + "External id": 987100,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940381617.834, "dur": 227.046, + "args": { + "External id": 987101,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940381620.424, "dur": 2.069, + "args": { + "External id": 987102,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940381624.326, "dur": 219.948, + "args": { + "External id": 987103,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940381626.139, "dur": 0.367, + "args": { + "External id": 987104,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940381630.551, "dur": 26.672, + "args": { + "External id": 987105,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940381659.239, "dur": 6.593, + "args": { + "External id": 987106,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940381662.398, "dur": 3.095, + "args": { + "External id": 987107,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940381666.859, "dur": 27.701, + "args": { + "External id": 987108,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940381668.256, "dur": 1.664, + "args": { + "External id": 987109,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940381671.472, "dur": 22.733, + "args": { + "External id": 987110,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940381677.165, "dur": 3.087, + "args": { + "External id": 987111,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940381696.731, "dur": 25.842, + "args": { + "External id": 987112,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940381724.288, "dur": 16.349, + "args": { + "External id": 987113,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940381743.599, "dur": 17.818, + "args": { + "External id": 987114,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940381763.144, "dur": 16.051, + "args": { + "External id": 987115,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940381784.470, "dur": 26.249, + "args": { + "External id": 987116,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940381787.225, "dur": 2.172, + "args": { + "External id": 987117,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940381792.039, "dur": 0.861, + "args": { + "External id": 987118,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940381812.355, "dur": 15.124, + "args": { + "External id": 987119,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940381828.786, "dur": 14.252, + "args": { + "External id": 987120,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940381853.211, "dur": 2.102, + "args": { + "External id": 987121,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940381866.145, "dur": 5.480, + "args": { + "External id": 987122,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940381869.995, "dur": 0.609, + "args": { + "External id": 987123,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940381954.533, "dur": 89.666, + "args": { + "External id": 987124,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940382088.953, "dur": 9.510, + "args": { + "External id": 987125,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940382094.905, "dur": 1.524, + "args": { + "External id": 987126,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940382101.456, "dur": 36.853, + "args": { + "External id": 987127,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940382145.606, "dur": 10.881, + "args": { + "External id": 987128,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940382147.709, "dur": 7.737, + "args": { + "External id": 987129,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940382154.159, "dur": 1.015, + "args": { + "External id": 987130,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940382161.490, "dur": 63.530, + "args": { + "External id": 987131,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940382162.960, "dur": 61.337, + "args": { + "External id": 987132,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940382230.245, "dur": 20.415, + "args": { + "External id": 987133,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940382258.443, "dur": 5.584, + "args": { + "External id": 987134,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940382262.274, "dur": 0.594, + "args": { + "External id": 987135,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940382268.953, "dur": 86.076, + "args": { + "External id": 987136,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940382295.325, "dur": 6.250, + "args": { + "External id": 987137,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940382296.245, "dur": 4.598, + "args": { + "External id": 987138,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940382298.158, "dur": 2.428, + "args": { + "External id": 987139,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940382302.399, "dur": 52.123, + "args": { + "External id": 987140,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940382302.943, "dur": 51.050, + "args": { + "External id": 987141,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940382360.291, "dur": 6.785, + "args": { + "External id": 987142,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940382362.547, "dur": 2.790, + "args": { + "External id": 987143,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940382375.285, "dur": 1.819, + "args": { + "External id": 987144,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940382389.691, "dur": 6.529, + "args": { + "External id": 987145,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940382391.989, "dur": 3.907, + "args": { + "External id": 987146,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940382517.267, "dur": 208.886, + "args": { + "External id": 987147,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940382522.824, "dur": 2.131, + "args": { + "External id": 987148,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940382526.855, "dur": 198.798, + "args": { + "External id": 987149,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940382528.336, "dur": 0.416, + "args": { + "External id": 987150,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940382533.056, "dur": 26.988, + "args": { + "External id": 987151,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940382562.201, "dur": 5.227, + "args": { + "External id": 987152,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940382566.391, "dur": 0.701, + "args": { + "External id": 987153,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940382568.548, "dur": 23.590, + "args": { + "External id": 987154,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940382569.538, "dur": 1.452, + "args": { + "External id": 987155,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940382572.279, "dur": 19.536, + "args": { + "External id": 987156,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940382575.520, "dur": 3.545, + "args": { + "External id": 987157,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940382593.762, "dur": 25.360, + "args": { + "External id": 987158,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940382620.740, "dur": 14.883, + "args": { + "External id": 987159,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940382639.268, "dur": 14.007, + "args": { + "External id": 987160,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940382657.396, "dur": 14.070, + "args": { + "External id": 987161,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940382673.537, "dur": 23.713, + "args": { + "External id": 987162,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940382678.235, "dur": 1.325, + "args": { + "External id": 987163,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940382681.786, "dur": 0.558, + "args": { + "External id": 987164,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940382698.960, "dur": 13.189, + "args": { + "External id": 987165,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940382713.349, "dur": 11.231, + "args": { + "External id": 987166,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940382736.510, "dur": 1.863, + "args": { + "External id": 987167,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940382749.026, "dur": 5.670, + "args": { + "External id": 987168,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940382752.217, "dur": 0.398, + "args": { + "External id": 987169,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940382833.750, "dur": 61.854, + "args": { + "External id": 987170,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940382901.260, "dur": 5.331, + "args": { + "External id": 987171,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940382904.569, "dur": 0.776, + "args": { + "External id": 987172,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940382907.958, "dur": 30.510, + "args": { + "External id": 987173,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940382943.891, "dur": 8.439, + "args": { + "External id": 987174,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940382947.845, "dur": 3.667, + "args": { + "External id": 987175,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940382950.160, "dur": 1.107, + "args": { + "External id": 987176,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940382954.958, "dur": 48.410, + "args": { + "External id": 987177,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940382956.118, "dur": 46.642, + "args": { + "External id": 987178,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940383027.356, "dur": 22.642, + "args": { + "External id": 987179,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940383098.294, "dur": 9.728, + "args": { + "External id": 987180,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940383105.617, "dur": 0.791, + "args": { + "External id": 987181,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940383113.846, "dur": 59.651, + "args": { + "External id": 987182,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940383114.904, "dur": 4.855, + "args": { + "External id": 987183,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940383116.257, "dur": 2.794, + "args": { + "External id": 987184,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940383117.858, "dur": 1.024, + "args": { + "External id": 987185,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940383120.811, "dur": 52.002, + "args": { + "External id": 987186,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940383121.793, "dur": 50.131, + "args": { + "External id": 987187,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940383180.654, "dur": 5.393, + "args": { + "External id": 987188,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940383183.585, "dur": 0.731, + "args": { + "External id": 987189,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940383193.510, "dur": 2.003, + "args": { + "External id": 987190,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940383205.037, "dur": 7.173, + "args": { + "External id": 987191,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940383207.308, "dur": 4.586, + "args": { + "External id": 987192,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940383321.157, "dur": 202.762, + "args": { + "External id": 987193,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940383325.104, "dur": 4.313, + "args": { + "External id": 987194,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940383331.379, "dur": 191.947, + "args": { + "External id": 987195,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940383332.870, "dur": 0.557, + "args": { + "External id": 987196,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940383334.750, "dur": 24.927, + "args": { + "External id": 987197,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940383361.520, "dur": 5.151, + "args": { + "External id": 987198,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940383365.491, "dur": 0.894, + "args": { + "External id": 987199,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940383367.686, "dur": 26.091, + "args": { + "External id": 987200,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940383369.596, "dur": 1.314, + "args": { + "External id": 987201,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940383374.577, "dur": 18.864, + "args": { + "External id": 987202,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940383377.671, "dur": 2.909, + "args": { + "External id": 987203,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940383395.491, "dur": 22.509, + "args": { + "External id": 987204,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940383420.047, "dur": 15.398, + "args": { + "External id": 987205,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940383438.351, "dur": 15.138, + "args": { + "External id": 987206,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940383455.237, "dur": 13.264, + "args": { + "External id": 987207,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940383470.951, "dur": 21.923, + "args": { + "External id": 987208,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940383473.204, "dur": 1.926, + "args": { + "External id": 987209,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940383477.565, "dur": 0.700, + "args": { + "External id": 987210,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940383497.349, "dur": 12.057, + "args": { + "External id": 987211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940383510.436, "dur": 11.611, + "args": { + "External id": 987212,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940383531.278, "dur": 1.831, + "args": { + "External id": 987213,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940383543.267, "dur": 3.886, + "args": { + "External id": 987214,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940383545.749, "dur": 0.377, + "args": { + "External id": 987215,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940383623.973, "dur": 60.954, + "args": { + "External id": 987216,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940383690.632, "dur": 5.882, + "args": { + "External id": 987217,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940383694.274, "dur": 0.823, + "args": { + "External id": 987218,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940383700.939, "dur": 29.777, + "args": { + "External id": 987219,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940383736.410, "dur": 6.570, + "args": { + "External id": 987220,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940383738.383, "dur": 3.846, + "args": { + "External id": 987221,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940383740.862, "dur": 1.110, + "args": { + "External id": 987222,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940383746.092, "dur": 45.952, + "args": { + "External id": 987223,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940383747.576, "dur": 43.917, + "args": { + "External id": 987224,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940383796.406, "dur": 15.748, + "args": { + "External id": 987225,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940383821.709, "dur": 4.002, + "args": { + "External id": 987226,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940383824.027, "dur": 0.672, + "args": { + "External id": 987227,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940383830.020, "dur": 54.825, + "args": { + "External id": 987228,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940383831.035, "dur": 4.552, + "args": { + "External id": 987229,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940383832.038, "dur": 2.722, + "args": { + "External id": 987230,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940383833.839, "dur": 0.757, + "args": { + "External id": 987231,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940383838.830, "dur": 45.521, + "args": { + "External id": 987232,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940383839.711, "dur": 43.860, + "args": { + "External id": 987233,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940383890.237, "dur": 4.439, + "args": { + "External id": 987234,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940383892.551, "dur": 0.706, + "args": { + "External id": 987235,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940383900.892, "dur": 1.647, + "args": { + "External id": 987236,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940383912.112, "dur": 9.344, + "args": { + "External id": 987237,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940383914.376, "dur": 6.685, + "args": { + "External id": 987238,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940384050.836, "dur": 273.624, + "args": { + "External id": 987239,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940384091.711, "dur": 5.295, + "args": { + "External id": 987240,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940384099.346, "dur": 224.487, + "args": { + "External id": 987241,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940384101.106, "dur": 0.473, + "args": { + "External id": 987242,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940384103.247, "dur": 33.209, + "args": { + "External id": 987243,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940384138.701, "dur": 4.177, + "args": { + "External id": 987244,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940384141.355, "dur": 1.187, + "args": { + "External id": 987245,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940384144.084, "dur": 30.502, + "args": { + "External id": 987246,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940384145.572, "dur": 1.604, + "args": { + "External id": 987247,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940384150.940, "dur": 23.240, + "args": { + "External id": 987248,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940384156.007, "dur": 4.172, + "args": { + "External id": 987249,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940384176.145, "dur": 25.179, + "args": { + "External id": 987250,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940384203.140, "dur": 16.320, + "args": { + "External id": 987251,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940384223.176, "dur": 15.568, + "args": { + "External id": 987252,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940384240.284, "dur": 15.788, + "args": { + "External id": 987253,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940384258.572, "dur": 26.433, + "args": { + "External id": 987254,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940384260.972, "dur": 1.682, + "args": { + "External id": 987255,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940384267.706, "dur": 1.067, + "args": { + "External id": 987256,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940384293.729, "dur": 15.202, + "args": { + "External id": 987257,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940384310.563, "dur": 11.900, + "args": { + "External id": 987258,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940384334.576, "dur": 2.750, + "args": { + "External id": 987259,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940384349.204, "dur": 5.062, + "args": { + "External id": 987260,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940384352.636, "dur": 0.684, + "args": { + "External id": 987261,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940384439.644, "dur": 70.297, + "args": { + "External id": 987262,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940384515.885, "dur": 10.001, + "args": { + "External id": 987263,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940384521.906, "dur": 2.754, + "args": { + "External id": 987264,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940384527.812, "dur": 29.756, + "args": { + "External id": 987265,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940384563.206, "dur": 6.532, + "args": { + "External id": 987266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940384565.137, "dur": 3.758, + "args": { + "External id": 987267,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940384567.545, "dur": 1.113, + "args": { + "External id": 987268,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940384572.917, "dur": 48.502, + "args": { + "External id": 987269,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940384576.719, "dur": 44.018, + "args": { + "External id": 987270,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940384626.098, "dur": 15.474, + "args": { + "External id": 987271,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940384648.879, "dur": 3.957, + "args": { + "External id": 987272,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940384651.425, "dur": 0.402, + "args": { + "External id": 987273,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940384657.564, "dur": 56.111, + "args": { + "External id": 987274,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940384658.565, "dur": 10.665, + "args": { + "External id": 987275,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940384663.196, "dur": 5.279, + "args": { + "External id": 987276,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940384667.577, "dur": 0.718, + "args": { + "External id": 987277,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940384670.134, "dur": 43.133, + "args": { + "External id": 987278,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940384670.688, "dur": 41.914, + "args": { + "External id": 987279,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940384718.732, "dur": 4.510, + "args": { + "External id": 987280,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940384721.214, "dur": 0.507, + "args": { + "External id": 987281,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940384729.801, "dur": 1.517, + "args": { + "External id": 987282,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940384742.601, "dur": 10.055, + "args": { + "External id": 987283,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940384745.076, "dur": 7.232, + "args": { + "External id": 987284,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940384849.147, "dur": 258.189, + "args": { + "External id": 987285,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940384851.560, "dur": 2.160, + "args": { + "External id": 987286,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940384855.248, "dur": 251.394, + "args": { + "External id": 987287,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940384856.579, "dur": 0.442, + "args": { + "External id": 987288,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940384860.936, "dur": 23.407, + "args": { + "External id": 987289,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940384886.184, "dur": 3.616, + "args": { + "External id": 987290,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940384888.722, "dur": 0.874, + "args": { + "External id": 987291,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940384890.902, "dur": 24.527, + "args": { + "External id": 987292,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940384892.050, "dur": 1.653, + "args": { + "External id": 987293,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940384894.855, "dur": 20.246, + "args": { + "External id": 987294,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940384899.515, "dur": 2.805, + "args": { + "External id": 987295,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940384916.969, "dur": 20.695, + "args": { + "External id": 987296,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940384939.359, "dur": 12.971, + "args": { + "External id": 987297,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940384955.486, "dur": 13.998, + "args": { + "External id": 987298,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940384970.735, "dur": 11.678, + "args": { + "External id": 987299,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940384986.769, "dur": 43.651, + "args": { + "External id": 987300,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940384989.061, "dur": 1.857, + "args": { + "External id": 987301,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940384993.283, "dur": 0.654, + "args": { + "External id": 987302,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940385033.170, "dur": 16.296, + "args": { + "External id": 987303,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940385050.789, "dur": 53.454, + "args": { + "External id": 987304,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940385118.215, "dur": 2.393, + "args": { + "External id": 987305,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940385131.628, "dur": 4.948, + "args": { + "External id": 987306,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940385134.845, "dur": 0.767, + "args": { + "External id": 987307,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940385218.936, "dur": 70.940, + "args": { + "External id": 987308,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940385295.400, "dur": 6.034, + "args": { + "External id": 987309,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940385298.866, "dur": 1.320, + "args": { + "External id": 987310,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940385303.156, "dur": 31.880, + "args": { + "External id": 987311,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940385340.318, "dur": 9.052, + "args": { + "External id": 987312,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940385342.302, "dur": 6.170, + "args": { + "External id": 987313,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940385347.357, "dur": 0.860, + "args": { + "External id": 987314,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940385352.562, "dur": 47.544, + "args": { + "External id": 987315,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940385353.761, "dur": 45.563, + "args": { + "External id": 987316,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940385404.586, "dur": 18.020, + "args": { + "External id": 987317,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940385429.224, "dur": 4.621, + "args": { + "External id": 987318,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940385432.082, "dur": 0.702, + "args": { + "External id": 987319,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940385438.635, "dur": 57.368, + "args": { + "External id": 987320,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940385442.097, "dur": 5.787, + "args": { + "External id": 987321,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940385442.932, "dur": 4.247, + "args": { + "External id": 987322,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940385444.924, "dur": 2.037, + "args": { + "External id": 987323,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940385448.884, "dur": 46.706, + "args": { + "External id": 987324,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940385449.788, "dur": 45.155, + "args": { + "External id": 987325,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940385501.297, "dur": 4.443, + "args": { + "External id": 987326,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940385503.426, "dur": 0.724, + "args": { + "External id": 987327,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940385511.474, "dur": 1.556, + "args": { + "External id": 987328,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940385525.249, "dur": 7.341, + "args": { + "External id": 987329,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940385527.795, "dur": 4.400, + "args": { + "External id": 987330,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940385626.632, "dur": 208.231, + "args": { + "External id": 987331,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940385629.966, "dur": 2.105, + "args": { + "External id": 987332,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940385633.787, "dur": 200.543, + "args": { + "External id": 987333,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940385635.331, "dur": 0.562, + "args": { + "External id": 987334,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940385641.717, "dur": 27.853, + "args": { + "External id": 987335,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940385671.450, "dur": 3.795, + "args": { + "External id": 987336,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940385674.183, "dur": 0.806, + "args": { + "External id": 987337,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940385676.419, "dur": 25.891, + "args": { + "External id": 987338,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940385677.428, "dur": 2.614, + "args": { + "External id": 987339,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940385681.238, "dur": 20.748, + "args": { + "External id": 987340,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940385684.420, "dur": 3.535, + "args": { + "External id": 987341,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940385703.825, "dur": 22.226, + "args": { + "External id": 987342,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940385727.764, "dur": 14.219, + "args": { + "External id": 987343,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940385745.341, "dur": 14.450, + "args": { + "External id": 987344,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940385763.595, "dur": 14.788, + "args": { + "External id": 987345,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940385780.538, "dur": 23.581, + "args": { + "External id": 987346,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940385782.951, "dur": 1.630, + "args": { + "External id": 987347,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940385786.825, "dur": 0.586, + "args": { + "External id": 987348,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940385805.511, "dur": 14.503, + "args": { + "External id": 987349,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940385821.171, "dur": 12.054, + "args": { + "External id": 987350,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940385844.410, "dur": 1.717, + "args": { + "External id": 987351,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940385855.266, "dur": 4.651, + "args": { + "External id": 987352,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940385858.449, "dur": 0.482, + "args": { + "External id": 987353,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940385930.938, "dur": 56.010, + "args": { + "External id": 987354,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940385992.377, "dur": 5.225, + "args": { + "External id": 987355,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940385995.722, "dur": 0.677, + "args": { + "External id": 987356,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940385999.177, "dur": 46.815, + "args": { + "External id": 987357,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940386089.480, "dur": 12.692, + "args": { + "External id": 987358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940386094.940, "dur": 6.092, + "args": { + "External id": 987359,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940386097.552, "dur": 2.930, + "args": { + "External id": 987360,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940386106.321, "dur": 63.046, + "args": { + "External id": 987361,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940386107.671, "dur": 60.528, + "args": { + "External id": 987362,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940386174.399, "dur": 17.393, + "args": { + "External id": 987363,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940386199.770, "dur": 7.051, + "args": { + "External id": 987364,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940386204.877, "dur": 0.623, + "args": { + "External id": 987365,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940386211.349, "dur": 49.628, + "args": { + "External id": 987366,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940386212.441, "dur": 4.268, + "args": { + "External id": 987367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940386213.379, "dur": 2.607, + "args": { + "External id": 987368,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940386215.136, "dur": 0.679, + "args": { + "External id": 987369,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940386217.497, "dur": 43.086, + "args": { + "External id": 987370,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940386218.213, "dur": 41.695, + "args": { + "External id": 987371,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940386268.051, "dur": 4.496, + "args": { + "External id": 987372,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940386270.463, "dur": 0.406, + "args": { + "External id": 987373,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940386279.549, "dur": 1.550, + "args": { + "External id": 987374,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940386290.997, "dur": 9.891, + "args": { + "External id": 987375,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940386293.541, "dur": 7.032, + "args": { + "External id": 987376,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940386406.906, "dur": 213.081, + "args": { + "External id": 987377,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940386409.678, "dur": 2.153, + "args": { + "External id": 987378,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940386414.796, "dur": 204.570, + "args": { + "External id": 987379,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940386416.230, "dur": 0.512, + "args": { + "External id": 987380,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940386418.810, "dur": 25.683, + "args": { + "External id": 987381,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940386446.872, "dur": 5.249, + "args": { + "External id": 987382,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940386449.653, "dur": 2.135, + "args": { + "External id": 987383,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940386453.366, "dur": 26.695, + "args": { + "External id": 987384,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940386454.601, "dur": 1.379, + "args": { + "External id": 987385,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940386459.907, "dur": 19.751, + "args": { + "External id": 987386,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940386462.979, "dur": 2.833, + "args": { + "External id": 987387,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940386481.792, "dur": 23.806, + "args": { + "External id": 987388,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940386507.438, "dur": 15.621, + "args": { + "External id": 987389,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940386525.955, "dur": 16.039, + "args": { + "External id": 987390,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940386543.605, "dur": 14.556, + "args": { + "External id": 987391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940386562.652, "dur": 25.424, + "args": { + "External id": 987392,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940386566.587, "dur": 1.910, + "args": { + "External id": 987393,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940386570.841, "dur": 0.518, + "args": { + "External id": 987394,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940386592.087, "dur": 13.680, + "args": { + "External id": 987395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940386607.101, "dur": 11.031, + "args": { + "External id": 987396,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940386627.707, "dur": 1.894, + "args": { + "External id": 987397,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940386639.336, "dur": 3.814, + "args": { + "External id": 987398,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940386641.716, "dur": 0.559, + "args": { + "External id": 987399,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940386716.905, "dur": 59.436, + "args": { + "External id": 987400,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940386782.335, "dur": 5.173, + "args": { + "External id": 987401,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940386785.527, "dur": 0.707, + "args": { + "External id": 987402,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940386791.548, "dur": 31.311, + "args": { + "External id": 987403,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940386827.879, "dur": 5.588, + "args": { + "External id": 987404,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940386829.436, "dur": 3.245, + "args": { + "External id": 987405,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940386831.261, "dur": 1.236, + "args": { + "External id": 987406,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940386835.947, "dur": 45.088, + "args": { + "External id": 987407,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940386836.947, "dur": 43.408, + "args": { + "External id": 987408,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940386884.946, "dur": 14.860, + "args": { + "External id": 987409,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940386906.960, "dur": 24.850, + "args": { + "External id": 987410,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 20310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940386909.911, "dur": 21.463, + "args": { + "External id": 987411,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940386915.535, "dur": 0.826, + "args": { + "External id": 987412,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 20312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345940386937.695, "dur": 31.679, + "args": { + "External id": 987413,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 20313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345940386940.116, "dur": 29.006, + "args": { + "External id": 987414,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 20314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940386945.695, "dur": 3.741, + "args": { + "External id": 987415,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940386950.621, "dur": 17.856, + "args": { + "External id": 987416,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2338710, + "ts": 6345940386981.393, "dur": 9.134, + "args": { + "External id": 987417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 20317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2338710, + "ts": 6345940386986.955, "dur": 3.217, + "args": { + "External id": 987418,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 20318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2338710, + "ts": 6345940386991.924, "dur": 1.626, + "args": { + "External id": 987419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2338710, + "ts": 6345940386992.700, "dur": 0.759, + "args": { + "External id": 987420,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940387107.868, "dur": 34.465, + "args": { + "External id": 987421,"Sequence number": 10552687, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940387144.782, "dur": 15.570, + "args": { + "External id": 987422,"Sequence number": 10552688, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20322 + } + }, + { + "ph": "s", "id": 233, "pid": 2338710, "tid": 2338710, "ts": 6345940387144.782, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940387170.951, "dur": 8.522, + "args": { + "External id": 987423,"Sequence number": 10552689, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 20323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940387175.702, "dur": 1.879, + "args": { + "External id": 987424,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 20324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2338710, + "ts": 6345940387182.287, "dur": 7.215, + "args": { + "External id": 987425,"Sequence number": 10552689, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "3"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 20325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940387187.377, "dur": 0.680, + "args": { + "External id": 987426,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "4"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 20326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940387191.162, "dur": 2.975, + "args": { + "External id": 987427,"Sequence number": 10552689, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 20327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940387192.922, "dur": 0.479, + "args": { + "External id": 987428,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "4"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 20328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940387198.857, "dur": 9.134, + "args": { + "External id": 987429,"Sequence number": 10552689, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 20329 + } + }, + { + "ph": "s", "id": 232, "pid": 2338710, "tid": 2338710, "ts": 6345940387198.857, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940387205.276, "dur": 1.140, + "args": { + "External id": 987430,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 20330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940387209.212, "dur": 4.881, + "args": { + "External id": 987431,"Sequence number": 10552690, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 20331 + } + }, + { + "ph": "s", "id": 231, "pid": 2338710, "tid": 2338710, "ts": 6345940387209.212, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940387212.804, "dur": 0.324, + "args": { + "External id": 987432,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 20332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338710, "tid": 2338710, + "ts": 6345940387215.283, "dur": 10.610, + "args": { + "External id": 987433,"Sequence number": 10552691, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 20333 + } + }, + { + "ph": "s", "id": 230, "pid": 2338710, "tid": 2338710, "ts": 6345940387215.283, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940387222.281, "dur": 2.348, + "args": { + "External id": 987434,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 20334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940387227.057, "dur": 5.136, + "args": { + "External id": 987435,"Sequence number": 10552692, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 20335 + } + }, + { + "ph": "s", "id": 229, "pid": 2338710, "tid": 2338710, "ts": 6345940387227.057, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940387230.555, "dur": 0.662, + "args": { + "External id": 987436,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 20336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338710, "tid": 2338710, + "ts": 6345940387236.724, "dur": 41.933, + "args": { + "External id": 987437,"Sequence number": 10552693, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 20337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2338710, + "ts": 6345940387238.753, "dur": 39.651, + "args": { + "External id": 987438,"Sequence number": 10552693, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 20338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940387241.739, "dur": 11.143, + "args": { + "External id": 987439,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 20339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940387244.542, "dur": 7.443, + "args": { + "External id": 987440,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940387254.040, "dur": 23.827, + "args": { + "External id": 987441,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 20341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940387310.164, "dur": 6.688, + "args": { + "External id": 987442,"Sequence number": 10552693, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 20342 + } + }, + { + "ph": "s", "id": 228, "pid": 2338710, "tid": 2338710, "ts": 6345940387310.164, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940387319.876, "dur": 1.148, + "args": { + "External id": 987443,"Sequence number": 10552694, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 20343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338710, "tid": 2338710, + "ts": 6345940387358.859, "dur": 46267.471, + "args": { + "External id": 987444,"Sequence number": 10552694, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 20344 + } + }, + { + "ph": "s", "id": 227, "pid": 2338710, "tid": 2338710, "ts": 6345940387358.859, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338710, "tid": 2338710, + "ts": 6345940387377.827, "dur": 29.250, + "args": { + "External id": 987445,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 20345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2338710, + "ts": 6345940387378.861, "dur": 27.984, + "args": { + "External id": 987446,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 20346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940387381.152, "dur": 6.149, + "args": { + "External id": 987447,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 20347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940387383.063, "dur": 3.671, + "args": { + "External id": 987448,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940387388.036, "dur": 18.356, + "args": { + "External id": 987449,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 20349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940387426.053, "dur": 31.797, + "args": { + "External id": 987450,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 20350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940387427.736, "dur": 9.318, + "args": { + "External id": 987451,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 20351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940387432.426, "dur": 4.264, + "args": { + "External id": 987452,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345940387438.631, "dur": 18.950, + "args": { + "External id": 987453,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 20353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940387441.182, "dur": 15.919, + "args": { + "External id": 987454,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 20354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940387461.682, "dur": 22.545, + "args": { + "External id": 987455,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 20355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940387462.564, "dur": 4.666, + "args": { + "External id": 987456,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 20356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940387463.913, "dur": 3.061, + "args": { + "External id": 987457,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345940387468.014, "dur": 15.936, + "args": { + "External id": 987458,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940387468.648, "dur": 14.805, + "args": { + "External id": 987459,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 20359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2338710, + "ts": 6345940387492.133, "dur": 21.985, + "args": { + "External id": 987460,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 20360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940387496.071, "dur": 5.099, + "args": { + "External id": 987461,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345940387501.982, "dur": 11.674, + "args": { + "External id": 987462,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 20362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940387503.020, "dur": 10.195, + "args": { + "External id": 987463,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 20363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338710, "tid": 2338710, + "ts": 6345940387519.322, "dur": 24.371, + "args": { + "External id": 987464,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 20364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940387546.942, "dur": 54.991, + "args": { + "External id": 987465,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 20365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940387549.468, "dur": 51.949, + "args": { + "External id": 987466,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940387554.427, "dur": 0.887, + "args": { + "External id": 987467,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 20367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345940387556.854, "dur": 27.805, + "args": { + "External id": 987468,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345940387561.300, "dur": 23.117, + "args": { + "External id": 987469,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 20369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940387564.278, "dur": 3.092, + "args": { + "External id": 987470,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940387570.660, "dur": 13.303, + "args": { + "External id": 987471,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 20371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338710, "tid": 2338710, + "ts": 6345940387606.060, "dur": 39267.697, + "args": { + "External id": 987472,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 20372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338710, "tid": 2338710, + "ts": 6345940387607.785, "dur": 39263.886, + "args": { + "External id": 987473,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 20373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940426891.503, "dur": 9.163, + "args": { + "External id": 987474,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940426896.915, "dur": 1.554, + "args": { + "External id": 987475,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940426907.324, "dur": 176.145, + "args": { + "External id": 987476,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940426909.243, "dur": 9.786, + "args": { + "External id": 987477,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940426914.568, "dur": 3.458, + "args": { + "External id": 987478,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940426916.912, "dur": 0.816, + "args": { + "External id": 987479,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940426920.553, "dur": 160.859, + "args": { + "External id": 987480,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940426922.541, "dur": 156.951, + "args": { + "External id": 987481,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940427090.704, "dur": 9.023, + "args": { + "External id": 987482,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940427095.754, "dur": 0.943, + "args": { + "External id": 987483,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940427110.755, "dur": 3.718, + "args": { + "External id": 987484,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940427130.084, "dur": 9.469, + "args": { + "External id": 987485,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940427133.712, "dur": 5.501, + "args": { + "External id": 987486,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940427319.090, "dur": 249.428, + "args": { + "External id": 987487,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940427323.709, "dur": 5.310, + "args": { + "External id": 987488,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940427332.481, "dur": 235.397, + "args": { + "External id": 987489,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940427334.644, "dur": 0.497, + "args": { + "External id": 987490,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940427340.705, "dur": 35.453, + "args": { + "External id": 987491,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940427379.512, "dur": 7.158, + "args": { + "External id": 987492,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940427385.479, "dur": 0.819, + "args": { + "External id": 987493,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940427387.967, "dur": 28.198, + "args": { + "External id": 987494,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940427389.650, "dur": 1.088, + "args": { + "External id": 987495,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940427392.565, "dur": 23.265, + "args": { + "External id": 987496,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940427397.173, "dur": 4.199, + "args": { + "External id": 987497,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940427418.030, "dur": 26.253, + "args": { + "External id": 987498,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940427446.970, "dur": 16.902, + "args": { + "External id": 987499,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940427467.661, "dur": 17.750, + "args": { + "External id": 987500,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940427490.183, "dur": 15.399, + "args": { + "External id": 987501,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940427507.984, "dur": 26.989, + "args": { + "External id": 987502,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940427513.101, "dur": 2.257, + "args": { + "External id": 987503,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940427518.385, "dur": 0.756, + "args": { + "External id": 987504,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940427537.055, "dur": 14.045, + "args": { + "External id": 987505,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940427552.872, "dur": 13.620, + "args": { + "External id": 987506,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940427579.949, "dur": 2.303, + "args": { + "External id": 987507,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940427591.792, "dur": 5.330, + "args": { + "External id": 987508,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940427595.571, "dur": 0.603, + "args": { + "External id": 987509,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940427686.962, "dur": 89.098, + "args": { + "External id": 987510,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940427783.393, "dur": 8.732, + "args": { + "External id": 987511,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940427787.596, "dur": 1.388, + "args": { + "External id": 987512,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940427793.902, "dur": 31.952, + "args": { + "External id": 987513,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940427832.648, "dur": 12.290, + "args": { + "External id": 987514,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940427838.636, "dur": 5.619, + "args": { + "External id": 987515,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940427841.553, "dur": 2.433, + "args": { + "External id": 987516,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940427848.853, "dur": 54.341, + "args": { + "External id": 987517,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940427850.273, "dur": 52.111, + "args": { + "External id": 987518,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940427908.580, "dur": 18.189, + "args": { + "External id": 987519,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940427934.146, "dur": 8.277, + "args": { + "External id": 987520,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940427940.688, "dur": 0.575, + "args": { + "External id": 987521,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940427948.389, "dur": 54.888, + "args": { + "External id": 987522,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940427949.597, "dur": 6.987, + "args": { + "External id": 987523,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940427950.650, "dur": 5.233, + "args": { + "External id": 987524,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940427952.338, "dur": 3.363, + "args": { + "External id": 987525,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940427957.493, "dur": 45.232, + "args": { + "External id": 987526,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940427958.167, "dur": 44.000, + "args": { + "External id": 987527,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940428034.397, "dur": 7.416, + "args": { + "External id": 987528,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940428038.954, "dur": 0.737, + "args": { + "External id": 987529,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940428049.577, "dur": 2.137, + "args": { + "External id": 987530,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940428104.573, "dur": 13.050, + "args": { + "External id": 987531,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940428108.063, "dur": 8.851, + "args": { + "External id": 987532,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940428245.057, "dur": 222.662, + "args": { + "External id": 987533,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940428248.832, "dur": 2.308, + "args": { + "External id": 987534,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940428253.503, "dur": 213.580, + "args": { + "External id": 987535,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940428255.581, "dur": 0.547, + "args": { + "External id": 987536,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940428257.523, "dur": 29.753, + "args": { + "External id": 987537,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940428289.298, "dur": 3.787, + "args": { + "External id": 987538,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940428291.689, "dur": 0.910, + "args": { + "External id": 987539,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940428294.180, "dur": 30.493, + "args": { + "External id": 987540,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940428296.018, "dur": 1.616, + "args": { + "External id": 987541,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940428301.283, "dur": 23.051, + "args": { + "External id": 987542,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940428306.164, "dur": 3.122, + "args": { + "External id": 987543,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940428326.913, "dur": 26.316, + "args": { + "External id": 987544,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940428355.194, "dur": 16.641, + "args": { + "External id": 987545,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940428375.335, "dur": 15.934, + "args": { + "External id": 987546,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940428392.977, "dur": 14.479, + "args": { + "External id": 987547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940428409.438, "dur": 24.380, + "args": { + "External id": 987548,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940428412.572, "dur": 2.696, + "args": { + "External id": 987549,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940428417.751, "dur": 0.750, + "args": { + "External id": 987550,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940428438.103, "dur": 13.994, + "args": { + "External id": 987551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940428453.511, "dur": 12.170, + "args": { + "External id": 987552,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940428476.713, "dur": 2.396, + "args": { + "External id": 987553,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940428490.845, "dur": 5.757, + "args": { + "External id": 987554,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940428493.876, "dur": 1.557, + "args": { + "External id": 987555,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940428576.438, "dur": 70.625, + "args": { + "External id": 987556,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940428652.932, "dur": 5.562, + "args": { + "External id": 987557,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940428656.351, "dur": 0.871, + "args": { + "External id": 987558,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940428662.724, "dur": 30.817, + "args": { + "External id": 987559,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940428698.781, "dur": 5.702, + "args": { + "External id": 987560,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940428700.635, "dur": 3.096, + "args": { + "External id": 987561,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940428702.565, "dur": 0.913, + "args": { + "External id": 987562,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940428707.576, "dur": 49.038, + "args": { + "External id": 987563,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940428709.043, "dur": 46.908, + "args": { + "External id": 987564,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940428761.100, "dur": 18.207, + "args": { + "External id": 987565,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940428788.091, "dur": 4.182, + "args": { + "External id": 987566,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940428790.496, "dur": 0.804, + "args": { + "External id": 987567,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940428796.884, "dur": 56.741, + "args": { + "External id": 987568,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940428797.878, "dur": 5.861, + "args": { + "External id": 987569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940428798.709, "dur": 4.355, + "args": { + "External id": 987570,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940428800.279, "dur": 2.519, + "args": { + "External id": 987571,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940428807.028, "dur": 46.110, + "args": { + "External id": 987572,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940428807.935, "dur": 44.359, + "args": { + "External id": 987573,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940428859.078, "dur": 4.501, + "args": { + "External id": 987574,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940428861.537, "dur": 0.545, + "args": { + "External id": 987575,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940428869.409, "dur": 1.480, + "args": { + "External id": 987576,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940428879.295, "dur": 7.034, + "args": { + "External id": 987577,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940428881.290, "dur": 4.722, + "args": { + "External id": 987578,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940428989.714, "dur": 281.256, + "args": { + "External id": 987579,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940428992.386, "dur": 2.251, + "args": { + "External id": 987580,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940428996.430, "dur": 273.968, + "args": { + "External id": 987581,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940428997.703, "dur": 0.527, + "args": { + "External id": 987582,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940429000.635, "dur": 47.401, + "args": { + "External id": 987583,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940429051.341, "dur": 42.586, + "args": { + "External id": 987584,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940429091.072, "dur": 1.093, + "args": { + "External id": 987585,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940429095.082, "dur": 36.088, + "args": { + "External id": 987586,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940429096.798, "dur": 4.271, + "args": { + "External id": 987587,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940429105.122, "dur": 25.695, + "args": { + "External id": 987588,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940429108.659, "dur": 4.085, + "args": { + "External id": 987589,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940429132.834, "dur": 26.822, + "args": { + "External id": 987590,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940429161.752, "dur": 16.146, + "args": { + "External id": 987591,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940429181.367, "dur": 17.097, + "args": { + "External id": 987592,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940429200.324, "dur": 14.980, + "args": { + "External id": 987593,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940429217.304, "dur": 25.164, + "args": { + "External id": 987594,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940429220.034, "dur": 1.633, + "args": { + "External id": 987595,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940429226.291, "dur": 0.657, + "args": { + "External id": 987596,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940429243.971, "dur": 12.389, + "args": { + "External id": 987597,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940429257.562, "dur": 11.358, + "args": { + "External id": 987598,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940429280.245, "dur": 3.047, + "args": { + "External id": 987599,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940429294.517, "dur": 5.830, + "args": { + "External id": 987600,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940429297.687, "dur": 0.465, + "args": { + "External id": 987601,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940429385.377, "dur": 75.086, + "args": { + "External id": 987602,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940429466.187, "dur": 7.953, + "args": { + "External id": 987603,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940429472.013, "dur": 0.911, + "args": { + "External id": 987604,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940429475.694, "dur": 33.099, + "args": { + "External id": 987605,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940429514.140, "dur": 7.765, + "args": { + "External id": 987606,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940429515.971, "dur": 5.154, + "args": { + "External id": 987607,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940429518.078, "dur": 2.807, + "args": { + "External id": 987608,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940429525.040, "dur": 51.863, + "args": { + "External id": 987609,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940429528.405, "dur": 47.733, + "args": { + "External id": 987610,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940429581.721, "dur": 15.664, + "args": { + "External id": 987611,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940429604.394, "dur": 4.447, + "args": { + "External id": 987612,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940429606.965, "dur": 0.759, + "args": { + "External id": 987613,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940429613.360, "dur": 50.539, + "args": { + "External id": 987614,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940429614.460, "dur": 6.552, + "args": { + "External id": 987615,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940429615.586, "dur": 4.690, + "args": { + "External id": 987616,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940429619.438, "dur": 0.604, + "args": { + "External id": 987617,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940429621.830, "dur": 41.629, + "args": { + "External id": 987618,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940429622.439, "dur": 40.411, + "args": { + "External id": 987619,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940429668.279, "dur": 4.633, + "args": { + "External id": 987620,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940429670.750, "dur": 0.891, + "args": { + "External id": 987621,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940429678.992, "dur": 1.723, + "args": { + "External id": 987622,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940429692.511, "dur": 8.656, + "args": { + "External id": 987623,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940429694.797, "dur": 6.028, + "args": { + "External id": 987624,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940429795.999, "dur": 199.683, + "args": { + "External id": 987625,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940429801.610, "dur": 2.245, + "args": { + "External id": 987626,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940429807.744, "dur": 187.364, + "args": { + "External id": 987627,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940429809.227, "dur": 0.621, + "args": { + "External id": 987628,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940429813.782, "dur": 22.472, + "args": { + "External id": 987629,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940429838.090, "dur": 4.984, + "args": { + "External id": 987630,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940429840.466, "dur": 2.363, + "args": { + "External id": 987631,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940429844.075, "dur": 21.642, + "args": { + "External id": 987632,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940429845.870, "dur": 1.129, + "args": { + "External id": 987633,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940429848.054, "dur": 17.360, + "args": { + "External id": 987634,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940429850.587, "dur": 2.311, + "args": { + "External id": 987635,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940429867.075, "dur": 21.318, + "args": { + "External id": 987636,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940429890.020, "dur": 15.599, + "args": { + "External id": 987637,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940429908.887, "dur": 14.542, + "args": { + "External id": 987638,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940429924.858, "dur": 14.535, + "args": { + "External id": 987639,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940429943.754, "dur": 22.344, + "args": { + "External id": 987640,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940429945.932, "dur": 1.540, + "args": { + "External id": 987641,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940429949.831, "dur": 0.502, + "args": { + "External id": 987642,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940429967.943, "dur": 13.717, + "args": { + "External id": 987643,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940429983.074, "dur": 10.625, + "args": { + "External id": 987644,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940430002.566, "dur": 1.834, + "args": { + "External id": 987645,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940430035.072, "dur": 6.652, + "args": { + "External id": 987646,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940430039.553, "dur": 0.886, + "args": { + "External id": 987647,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940430166.929, "dur": 68.231, + "args": { + "External id": 987648,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940430262.826, "dur": 8.933, + "args": { + "External id": 987649,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940430267.244, "dur": 2.463, + "args": { + "External id": 987650,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940430273.744, "dur": 31.144, + "args": { + "External id": 987651,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940430311.188, "dur": 8.749, + "args": { + "External id": 987652,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940430313.021, "dur": 6.045, + "args": { + "External id": 987653,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940430317.925, "dur": 0.782, + "args": { + "External id": 987654,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940430322.927, "dur": 48.265, + "args": { + "External id": 987655,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940430323.963, "dur": 46.459, + "args": { + "External id": 987656,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940430375.931, "dur": 17.241, + "args": { + "External id": 987657,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940430399.954, "dur": 3.684, + "args": { + "External id": 987658,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940430402.267, "dur": 0.420, + "args": { + "External id": 987659,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940430410.969, "dur": 48.995, + "args": { + "External id": 987660,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940430411.733, "dur": 3.948, + "args": { + "External id": 987661,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940430412.503, "dur": 2.518, + "args": { + "External id": 987662,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940430414.236, "dur": 0.598, + "args": { + "External id": 987663,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940430416.449, "dur": 43.025, + "args": { + "External id": 987664,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940430416.938, "dur": 41.860, + "args": { + "External id": 987665,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940430465.628, "dur": 4.096, + "args": { + "External id": 987666,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940430467.937, "dur": 0.444, + "args": { + "External id": 987667,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940430479.096, "dur": 1.749, + "args": { + "External id": 987668,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940430490.581, "dur": 8.129, + "args": { + "External id": 987669,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940430493.030, "dur": 5.373, + "args": { + "External id": 987670,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940430605.119, "dur": 224.410, + "args": { + "External id": 987671,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940430608.055, "dur": 3.987, + "args": { + "External id": 987672,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940430613.773, "dur": 215.249, + "args": { + "External id": 987673,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940430617.659, "dur": 0.397, + "args": { + "External id": 987674,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940430619.736, "dur": 25.053, + "args": { + "External id": 987675,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940430646.384, "dur": 5.831, + "args": { + "External id": 987676,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940430651.255, "dur": 0.703, + "args": { + "External id": 987677,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940430653.455, "dur": 28.718, + "args": { + "External id": 987678,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940430654.493, "dur": 1.069, + "args": { + "External id": 987679,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940430656.983, "dur": 24.852, + "args": { + "External id": 987680,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940430660.440, "dur": 3.453, + "args": { + "External id": 987681,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940430683.582, "dur": 27.502, + "args": { + "External id": 987682,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940430712.916, "dur": 15.434, + "args": { + "External id": 987683,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940430734.219, "dur": 17.643, + "args": { + "External id": 987684,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940430753.416, "dur": 15.042, + "args": { + "External id": 987685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940430770.756, "dur": 24.926, + "args": { + "External id": 987686,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940430773.116, "dur": 2.157, + "args": { + "External id": 987687,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940430777.250, "dur": 2.887, + "args": { + "External id": 987688,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940430797.441, "dur": 12.948, + "args": { + "External id": 987689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940430814.455, "dur": 13.311, + "args": { + "External id": 987690,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940430837.809, "dur": 1.631, + "args": { + "External id": 987691,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940430849.696, "dur": 4.003, + "args": { + "External id": 987692,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940430852.389, "dur": 0.389, + "args": { + "External id": 987693,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940430929.290, "dur": 61.493, + "args": { + "External id": 987694,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940430996.876, "dur": 5.679, + "args": { + "External id": 987695,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940431000.477, "dur": 0.810, + "args": { + "External id": 987696,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940431004.037, "dur": 96.053, + "args": { + "External id": 987697,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940431110.580, "dur": 8.549, + "args": { + "External id": 987698,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940431112.981, "dur": 5.133, + "args": { + "External id": 987699,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940431115.824, "dur": 2.082, + "args": { + "External id": 987700,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940431122.607, "dur": 55.438, + "args": { + "External id": 987701,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940431123.622, "dur": 53.568, + "args": { + "External id": 987702,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940431183.280, "dur": 20.295, + "args": { + "External id": 987703,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940431211.703, "dur": 7.036, + "args": { + "External id": 987704,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940431217.124, "dur": 0.443, + "args": { + "External id": 987705,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940431223.157, "dur": 53.832, + "args": { + "External id": 987706,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940431224.250, "dur": 3.531, + "args": { + "External id": 987707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940431224.973, "dur": 2.146, + "args": { + "External id": 987708,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940431226.432, "dur": 0.545, + "args": { + "External id": 987709,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940431228.619, "dur": 47.996, + "args": { + "External id": 987710,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940431231.370, "dur": 44.449, + "args": { + "External id": 987711,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940431282.078, "dur": 4.647, + "args": { + "External id": 987712,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940431284.657, "dur": 0.718, + "args": { + "External id": 987713,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940431294.099, "dur": 1.871, + "args": { + "External id": 987714,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940431305.259, "dur": 11.643, + "args": { + "External id": 987715,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940431309.960, "dur": 6.624, + "args": { + "External id": 987716,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940431422.657, "dur": 226.770, + "args": { + "External id": 987717,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940431427.581, "dur": 2.036, + "args": { + "External id": 987718,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940431437.104, "dur": 211.749, + "args": { + "External id": 987719,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940431438.475, "dur": 0.714, + "args": { + "External id": 987720,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940431442.370, "dur": 25.107, + "args": { + "External id": 987721,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940431469.372, "dur": 4.282, + "args": { + "External id": 987722,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940431472.908, "dur": 0.502, + "args": { + "External id": 987723,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940431477.164, "dur": 24.382, + "args": { + "External id": 987724,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940431478.731, "dur": 1.106, + "args": { + "External id": 987725,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940431481.004, "dur": 20.262, + "args": { + "External id": 987726,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940431483.926, "dur": 3.035, + "args": { + "External id": 987727,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940431502.978, "dur": 29.606, + "args": { + "External id": 987728,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940431534.459, "dur": 19.471, + "args": { + "External id": 987729,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940431556.989, "dur": 15.189, + "args": { + "External id": 987730,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940431573.850, "dur": 16.282, + "args": { + "External id": 987731,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940431591.995, "dur": 25.723, + "args": { + "External id": 987732,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940431594.404, "dur": 1.620, + "args": { + "External id": 987733,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940431600.641, "dur": 0.597, + "args": { + "External id": 987734,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940431619.524, "dur": 14.858, + "args": { + "External id": 987735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940431635.689, "dur": 12.005, + "args": { + "External id": 987736,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940431657.131, "dur": 1.898, + "args": { + "External id": 987737,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940431668.142, "dur": 3.789, + "args": { + "External id": 987738,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940431670.664, "dur": 0.364, + "args": { + "External id": 987739,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940431747.146, "dur": 57.133, + "args": { + "External id": 987740,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940431809.858, "dur": 7.848, + "args": { + "External id": 987741,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940431815.831, "dur": 0.586, + "args": { + "External id": 987742,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940431819.349, "dur": 27.282, + "args": { + "External id": 987743,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940431851.529, "dur": 6.153, + "args": { + "External id": 987744,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940431852.968, "dur": 3.783, + "args": { + "External id": 987745,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940431855.147, "dur": 1.418, + "args": { + "External id": 987746,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940431860.212, "dur": 49.012, + "args": { + "External id": 987747,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940431863.757, "dur": 44.493, + "args": { + "External id": 987748,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940431913.254, "dur": 17.225, + "args": { + "External id": 987749,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940431936.890, "dur": 4.275, + "args": { + "External id": 987750,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940431939.588, "dur": 0.502, + "args": { + "External id": 987751,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940431945.256, "dur": 52.049, + "args": { + "External id": 987752,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940431946.579, "dur": 6.510, + "args": { + "External id": 987753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940431947.451, "dur": 5.028, + "args": { + "External id": 987754,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940431951.847, "dur": 0.481, + "args": { + "External id": 987755,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940431953.792, "dur": 43.113, + "args": { + "External id": 987756,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940431954.689, "dur": 41.446, + "args": { + "External id": 987757,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940432002.412, "dur": 25.333, + "args": { + "External id": 987758,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940432004.772, "dur": 20.527, + "args": { + "External id": 987759,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940432037.409, "dur": 1.901, + "args": { + "External id": 987760,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940432048.313, "dur": 52.861, + "args": { + "External id": 987761,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940432090.859, "dur": 9.669, + "args": { + "External id": 987762,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940432207.960, "dur": 220.161, + "args": { + "External id": 987763,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940432210.693, "dur": 2.583, + "args": { + "External id": 987764,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940432215.566, "dur": 212.126, + "args": { + "External id": 987765,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940432217.565, "dur": 0.609, + "args": { + "External id": 987766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940432219.565, "dur": 29.017, + "args": { + "External id": 987767,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940432250.506, "dur": 4.079, + "args": { + "External id": 987768,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940432253.174, "dur": 0.915, + "args": { + "External id": 987769,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940432258.169, "dur": 26.085, + "args": { + "External id": 987770,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940432259.773, "dur": 1.294, + "args": { + "External id": 987771,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940432262.663, "dur": 21.267, + "args": { + "External id": 987772,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940432267.768, "dur": 2.999, + "args": { + "External id": 987773,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940432286.076, "dur": 23.717, + "args": { + "External id": 987774,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940432311.560, "dur": 18.810, + "args": { + "External id": 987775,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940432333.893, "dur": 17.751, + "args": { + "External id": 987776,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940432353.052, "dur": 14.257, + "args": { + "External id": 987777,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940432369.008, "dur": 25.729, + "args": { + "External id": 987778,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940432373.880, "dur": 1.380, + "args": { + "External id": 987779,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940432377.445, "dur": 0.687, + "args": { + "External id": 987780,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940432399.773, "dur": 13.734, + "args": { + "External id": 987781,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940432414.673, "dur": 11.752, + "args": { + "External id": 987782,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940432436.608, "dur": 2.469, + "args": { + "External id": 987783,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940432451.417, "dur": 4.937, + "args": { + "External id": 987784,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940432454.609, "dur": 0.685, + "args": { + "External id": 987785,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940432526.347, "dur": 67.337, + "args": { + "External id": 987786,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940432599.381, "dur": 7.662, + "args": { + "External id": 987787,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940432602.778, "dur": 2.955, + "args": { + "External id": 987788,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940432608.773, "dur": 30.420, + "args": { + "External id": 987789,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940432644.090, "dur": 8.337, + "args": { + "External id": 987790,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940432645.702, "dur": 5.969, + "args": { + "External id": 987791,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940432650.459, "dur": 1.034, + "args": { + "External id": 987792,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940432655.281, "dur": 49.036, + "args": { + "External id": 987793,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940432656.441, "dur": 47.166, + "args": { + "External id": 987794,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940432708.501, "dur": 19.311, + "args": { + "External id": 987795,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940432734.551, "dur": 4.466, + "args": { + "External id": 987796,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940432737.267, "dur": 0.677, + "args": { + "External id": 987797,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338710, "tid": 2338710, + "ts": 6345940432745.880, "dur": 50.381, + "args": { + "External id": 987798,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940432746.704, "dur": 3.442, + "args": { + "External id": 987799,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940432747.380, "dur": 2.169, + "args": { + "External id": 987800,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940432748.849, "dur": 0.518, + "args": { + "External id": 987801,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940432750.972, "dur": 44.891, + "args": { + "External id": 987802,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940432751.908, "dur": 43.190, + "args": { + "External id": 987803,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940432803.075, "dur": 4.211, + "args": { + "External id": 987804,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940432805.574, "dur": 0.415, + "args": { + "External id": 987805,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940432813.268, "dur": 1.455, + "args": { + "External id": 987806,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940432823.589, "dur": 6.113, + "args": { + "External id": 987807,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940432825.636, "dur": 3.724, + "args": { + "External id": 987808,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940432924.865, "dur": 276.207, + "args": { + "External id": 987809,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940432927.041, "dur": 4.122, + "args": { + "External id": 987810,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338710, "tid": 2338710, + "ts": 6345940432933.015, "dur": 267.343, + "args": { + "External id": 987811,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338710, "tid": 2338710, + "ts": 6345940432934.395, "dur": 0.596, + "args": { + "External id": 987812,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338710, "tid": 2338710, + "ts": 6345940432936.113, "dur": 23.987, + "args": { + "External id": 987813,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338710, "tid": 2338710, + "ts": 6345940432961.880, "dur": 4.791, + "args": { + "External id": 987814,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940432965.611, "dur": 0.873, + "args": { + "External id": 987815,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940432967.631, "dur": 26.473, + "args": { + "External id": 987816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345940432969.235, "dur": 1.419, + "args": { + "External id": 987817,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345940432971.809, "dur": 21.979, + "args": { + "External id": 987818,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940432977.266, "dur": 2.579, + "args": { + "External id": 987819,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345940432995.430, "dur": 42.598, + "args": { + "External id": 987820,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940433040.696, "dur": 59.954, + "args": { + "External id": 987821,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338710, "tid": 2338710, + "ts": 6345940433105.431, "dur": 17.645, + "args": { + "External id": 987822,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338710, "tid": 2338710, + "ts": 6345940433124.703, "dur": 17.746, + "args": { + "External id": 987823,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940433145.256, "dur": 24.813, + "args": { + "External id": 987824,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345940433147.563, "dur": 1.934, + "args": { + "External id": 987825,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940433152.083, "dur": 2.640, + "args": { + "External id": 987826,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338710, "tid": 2338710, + "ts": 6345940433174.643, "dur": 12.637, + "args": { + "External id": 987827,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940433188.292, "dur": 10.574, + "args": { + "External id": 987828,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345940433210.142, "dur": 2.463, + "args": { + "External id": 987829,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940433222.645, "dur": 4.619, + "args": { + "External id": 987830,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940433225.833, "dur": 0.425, + "args": { + "External id": 987831,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940433308.702, "dur": 70.436, + "args": { + "External id": 987832,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338710, "tid": 2338710, + "ts": 6345940433384.742, "dur": 5.517, + "args": { + "External id": 987833,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940433388.141, "dur": 0.704, + "args": { + "External id": 987834,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940433391.799, "dur": 29.632, + "args": { + "External id": 987835,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338710, "tid": 2338710, + "ts": 6345940433428.952, "dur": 7.041, + "args": { + "External id": 987836,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338710, "tid": 2338710, + "ts": 6345940433430.893, "dur": 4.212, + "args": { + "External id": 987837,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940433433.397, "dur": 1.513, + "args": { + "External id": 987838,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338710, "tid": 2338710, + "ts": 6345940433438.980, "dur": 48.178, + "args": { + "External id": 987839,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338710, "tid": 2338710, + "ts": 6345940433440.345, "dur": 45.999, + "args": { + "External id": 987840,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940433491.322, "dur": 15.709, + "args": { + "External id": 987841,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940433512.099, "dur": 27.749, + "args": { + "External id": 987842,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 20742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338710, "tid": 2338710, + "ts": 6345940433517.801, "dur": 21.605, + "args": { + "External id": 987843,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940433523.077, "dur": 0.792, + "args": { + "External id": 987844,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 20744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345940433545.964, "dur": 29.291, + "args": { + "External id": 987845,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 20745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338710, "tid": 2338710, + "ts": 6345940433548.111, "dur": 26.889, + "args": { + "External id": 987846,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 20746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940433553.659, "dur": 4.011, + "args": { + "External id": 987847,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345940433558.982, "dur": 15.448, + "args": { + "External id": 987848,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2338710, + "ts": 6345940433587.231, "dur": 6.982, + "args": { + "External id": 987849,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 20749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2338710, + "ts": 6345940433589.678, "dur": 4.023, + "args": { + "External id": 987850,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 20750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338710, "tid": 2338710, + "ts": 6345940433595.658, "dur": 3.898, + "args": { + "External id": 987851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338710, "tid": 2338710, + "ts": 6345940433598.919, "dur": 0.552, + "args": { + "External id": 987852,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940433646.844, "dur": 26.021, + "args": { + "External id": 987853,"Sequence number": 10552695, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338710, "tid": 2338710, + "ts": 6345940433675.049, "dur": 14.907, + "args": { + "External id": 987854,"Sequence number": 10552696, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20754 + } + }, + { + "ph": "s", "id": 226, "pid": 2338710, "tid": 2338710, "ts": 6345940433675.049, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward", "pid": 2338710, "tid": 2338710, + "ts": 6345940433814.897, "dur": 49.649, + "args": { + "External id": 987855,"Record function id": 0, "Ev Idx": 20755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338710, "tid": 2338710, + "ts": 6345940433980.789, "dur": 63.347, + "args": { + "External id": 987856,"Sequence number": 10552697, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 20756 + } + }, + { + "ph": "s", "id": 225, "pid": 2338710, "tid": 2338710, "ts": 6345940433980.789, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ones_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940434130.574, "dur": 37.567, + "args": { + "External id": 987857,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1], [], [], [], [], []], "Ev Idx": 20757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345940434132.539, "dur": 13.738, + "args": { + "External id": 987858,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1], [], [], [], [], []], "Ev Idx": 20758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345940434138.700, "dur": 6.596, + "args": { + "External id": 987859,"Record function id": 0, "Concrete Inputs": ["[1]", "[1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345940434148.122, "dur": 19.611, + "args": { + "External id": 987860,"Record function id": 0, "Concrete Inputs": ["", "1."], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 20760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338710, "tid": 2338710, + "ts": 6345942513673.644, "dur": 88.439, + "args": { + "External id": 987861,"Sequence number": 10552698, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 20761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338710, "tid": 2338710, + "ts": 6345942513771.761, "dur": 35.118, + "args": { + "External id": 987862,"Sequence number": 10552699, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 20762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2338710, + "ts": 6345942513818.522, "dur": 26.705, + "args": { + "External id": 987863,"Sequence number": 10552700, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2338710, + "ts": 6345942513849.096, "dur": 329.331, + "args": { + "External id": 987864,"Sequence number": 10552701, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[1], [1], []], "Ev Idx": 20764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2338710, + "ts": 6345942515337.673, "dur": 52.908, + "args": { + "External id": 987865,"Sequence number": 10552702, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 20765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2338710, + "ts": 6345942515395.381, "dur": 18.470, + "args": { + "External id": 987866,"Sequence number": 10552703, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 20766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2338710, + "ts": 6345942515425.438, "dur": 18.397, + "args": { + "External id": 987867,"Sequence number": 10552704, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2338710, + "ts": 6345942515446.217, "dur": 14.506, + "args": { + "External id": 987868,"Sequence number": 10552705, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[1], [1], []], "Ev Idx": 20768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_norm", "pid": 2338710, "tid": 2338710, + "ts": 6345942517992.363, "dur": 3738.977, + "args": { + "External id": 987869,"Record function id": 0, "Concrete Inputs": ["", "2.", ""], "Input type": ["TensorList", "Scalar", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 20769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_norm", "pid": 2338710, "tid": 2338710, + "ts": 6345942518631.076, "dur": 1478.749, + "args": { + "External id": 987870,"Record function id": 0, "Concrete Inputs": ["", "2.", ""], "Input type": ["TensorList", "Scalar", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 20770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338710, "tid": 2338710, + "ts": 6345942518658.462, "dur": 89.679, + "args": { + "External id": 987871,"Record function id": 0, "Concrete Inputs": ["[68250]", "6", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 20771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345942518663.615, "dur": 15.781, + "args": { + "External id": 987872,"Record function id": 0, "Concrete Inputs": ["[68250]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338710, "tid": 2338710, + "ts": 6345942518687.120, "dur": 60.630, + "args": { + "External id": 987873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[68250]], "Ev Idx": 20773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338710, "tid": 2338710, + "ts": 6345942518691.345, "dur": 55.639, + "args": { + "External id": 987874,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[68250], []], "Ev Idx": 20774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521770.054, "dur": 3.896, + "args": { + "External id": 987875,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521776.472, "dur": 0.495, + "args": { + "External id": 987876,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521778.872, "dur": 0.453, + "args": { + "External id": 987877,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521780.677, "dur": 0.499, + "args": { + "External id": 987878,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521783.163, "dur": 0.402, + "args": { + "External id": 987879,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521784.983, "dur": 0.324, + "args": { + "External id": 987880,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521786.494, "dur": 0.409, + "args": { + "External id": 987881,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521790.272, "dur": 0.410, + "args": { + "External id": 987882,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521791.801, "dur": 0.266, + "args": { + "External id": 987883,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521793.199, "dur": 0.327, + "args": { + "External id": 987884,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521794.679, "dur": 0.365, + "args": { + "External id": 987885,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521796.012, "dur": 0.376, + "args": { + "External id": 987886,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521797.334, "dur": 0.335, + "args": { + "External id": 987887,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521798.886, "dur": 0.345, + "args": { + "External id": 987888,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521800.439, "dur": 0.265, + "args": { + "External id": 987889,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521804.859, "dur": 0.248, + "args": { + "External id": 987890,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521805.953, "dur": 0.254, + "args": { + "External id": 987891,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521807.514, "dur": 0.242, + "args": { + "External id": 987892,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521808.859, "dur": 0.255, + "args": { + "External id": 987893,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521810.217, "dur": 0.260, + "args": { + "External id": 987894,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521811.462, "dur": 0.254, + "args": { + "External id": 987895,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521812.830, "dur": 0.257, + "args": { + "External id": 987896,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521814.020, "dur": 0.254, + "args": { + "External id": 987897,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521817.225, "dur": 0.392, + "args": { + "External id": 987898,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521818.747, "dur": 0.250, + "args": { + "External id": 987899,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521820.168, "dur": 0.369, + "args": { + "External id": 987900,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521821.427, "dur": 0.270, + "args": { + "External id": 987901,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521822.650, "dur": 0.263, + "args": { + "External id": 987902,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521823.847, "dur": 0.264, + "args": { + "External id": 987903,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521825.023, "dur": 0.252, + "args": { + "External id": 987904,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521826.152, "dur": 0.260, + "args": { + "External id": 987905,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521829.641, "dur": 0.246, + "args": { + "External id": 987906,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521830.792, "dur": 0.250, + "args": { + "External id": 987907,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521832.196, "dur": 0.238, + "args": { + "External id": 987908,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521833.323, "dur": 0.265, + "args": { + "External id": 987909,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521834.683, "dur": 0.388, + "args": { + "External id": 987910,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521836.008, "dur": 0.386, + "args": { + "External id": 987911,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521837.410, "dur": 0.275, + "args": { + "External id": 987912,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521838.604, "dur": 0.252, + "args": { + "External id": 987913,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521842.445, "dur": 0.262, + "args": { + "External id": 987914,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521843.596, "dur": 0.352, + "args": { + "External id": 987915,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521845.136, "dur": 0.358, + "args": { + "External id": 987916,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521846.503, "dur": 0.290, + "args": { + "External id": 987917,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521847.952, "dur": 0.256, + "args": { + "External id": 987918,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521849.368, "dur": 0.761, + "args": { + "External id": 987919,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521851.069, "dur": 0.383, + "args": { + "External id": 987920,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521852.408, "dur": 0.284, + "args": { + "External id": 987921,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521855.831, "dur": 0.396, + "args": { + "External id": 987922,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521857.148, "dur": 0.258, + "args": { + "External id": 987923,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521858.234, "dur": 0.420, + "args": { + "External id": 987924,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521859.514, "dur": 0.413, + "args": { + "External id": 987925,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521862.424, "dur": 0.466, + "args": { + "External id": 987926,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521864.067, "dur": 0.247, + "args": { + "External id": 987927,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521865.483, "dur": 0.236, + "args": { + "External id": 987928,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521866.603, "dur": 0.254, + "args": { + "External id": 987929,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521870.342, "dur": 0.252, + "args": { + "External id": 987930,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521871.573, "dur": 0.249, + "args": { + "External id": 987931,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521872.746, "dur": 0.422, + "args": { + "External id": 987932,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521874.050, "dur": 0.249, + "args": { + "External id": 987933,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521875.311, "dur": 0.254, + "args": { + "External id": 987934,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521876.532, "dur": 0.264, + "args": { + "External id": 987935,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521877.641, "dur": 0.444, + "args": { + "External id": 987936,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521879.091, "dur": 0.250, + "args": { + "External id": 987937,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521883.229, "dur": 0.248, + "args": { + "External id": 987938,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521884.642, "dur": 0.266, + "args": { + "External id": 987939,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521885.980, "dur": 0.430, + "args": { + "External id": 987940,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521887.307, "dur": 0.297, + "args": { + "External id": 987941,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521888.709, "dur": 0.255, + "args": { + "External id": 987942,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521890.008, "dur": 0.480, + "args": { + "External id": 987943,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521891.452, "dur": 0.440, + "args": { + "External id": 987944,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521892.793, "dur": 0.252, + "args": { + "External id": 987945,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521896.270, "dur": 0.464, + "args": { + "External id": 987946,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521897.572, "dur": 0.252, + "args": { + "External id": 987947,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521898.706, "dur": 0.237, + "args": { + "External id": 987948,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521899.815, "dur": 0.248, + "args": { + "External id": 987949,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521900.941, "dur": 0.251, + "args": { + "External id": 987950,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521902.068, "dur": 0.258, + "args": { + "External id": 987951,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521904.582, "dur": 0.246, + "args": { + "External id": 987952,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521905.970, "dur": 0.256, + "args": { + "External id": 987953,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521909.883, "dur": 0.256, + "args": { + "External id": 987954,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521911.001, "dur": 0.259, + "args": { + "External id": 987955,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521912.482, "dur": 0.238, + "args": { + "External id": 987956,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521913.647, "dur": 0.247, + "args": { + "External id": 987957,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521914.892, "dur": 0.256, + "args": { + "External id": 987958,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521916.082, "dur": 0.252, + "args": { + "External id": 987959,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521917.202, "dur": 0.240, + "args": { + "External id": 987960,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521918.330, "dur": 0.251, + "args": { + "External id": 987961,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521921.714, "dur": 0.254, + "args": { + "External id": 987962,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521923.114, "dur": 0.426, + "args": { + "External id": 987963,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521924.622, "dur": 0.475, + "args": { + "External id": 987964,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521925.932, "dur": 0.453, + "args": { + "External id": 987965,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521927.342, "dur": 0.447, + "args": { + "External id": 987966,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521928.867, "dur": 0.438, + "args": { + "External id": 987967,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521931.400, "dur": 0.459, + "args": { + "External id": 987968,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521932.838, "dur": 0.377, + "args": { + "External id": 987969,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521936.076, "dur": 0.421, + "args": { + "External id": 987970,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521937.451, "dur": 0.263, + "args": { + "External id": 987971,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521938.655, "dur": 0.386, + "args": { + "External id": 987972,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521939.892, "dur": 0.372, + "args": { + "External id": 987973,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521941.206, "dur": 0.397, + "args": { + "External id": 987974,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521942.494, "dur": 0.397, + "args": { + "External id": 987975,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521943.731, "dur": 0.373, + "args": { + "External id": 987976,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521944.934, "dur": 0.254, + "args": { + "External id": 987977,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521948.643, "dur": 0.258, + "args": { + "External id": 987978,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521949.992, "dur": 0.249, + "args": { + "External id": 987979,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521951.377, "dur": 0.244, + "args": { + "External id": 987980,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521952.694, "dur": 0.245, + "args": { + "External id": 987981,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521954.097, "dur": 0.251, + "args": { + "External id": 987982,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521955.362, "dur": 0.252, + "args": { + "External id": 987983,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521957.103, "dur": 0.249, + "args": { + "External id": 987984,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521958.219, "dur": 0.251, + "args": { + "External id": 987985,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521961.396, "dur": 0.251, + "args": { + "External id": 987986,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521962.590, "dur": 0.251, + "args": { + "External id": 987987,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521963.708, "dur": 0.240, + "args": { + "External id": 987988,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521965.014, "dur": 0.249, + "args": { + "External id": 987989,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521966.308, "dur": 0.411, + "args": { + "External id": 987990,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521967.730, "dur": 0.469, + "args": { + "External id": 987991,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521969.103, "dur": 0.574, + "args": { + "External id": 987992,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521970.708, "dur": 0.464, + "args": { + "External id": 987993,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521974.401, "dur": 0.237, + "args": { + "External id": 987994,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521975.648, "dur": 0.370, + "args": { + "External id": 987995,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521976.866, "dur": 0.356, + "args": { + "External id": 987996,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521978.146, "dur": 0.342, + "args": { + "External id": 987997,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521979.375, "dur": 0.342, + "args": { + "External id": 987998,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521980.609, "dur": 0.408, + "args": { + "External id": 987999,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521981.985, "dur": 0.385, + "args": { + "External id": 988000,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521983.232, "dur": 0.250, + "args": { + "External id": 988001,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521987.057, "dur": 0.250, + "args": { + "External id": 988002,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521988.184, "dur": 0.255, + "args": { + "External id": 988003,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521989.855, "dur": 0.253, + "args": { + "External id": 988004,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521991.015, "dur": 0.252, + "args": { + "External id": 988005,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521992.198, "dur": 0.255, + "args": { + "External id": 988006,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521993.335, "dur": 0.253, + "args": { + "External id": 988007,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521994.467, "dur": 0.241, + "args": { + "External id": 988008,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521995.560, "dur": 0.254, + "args": { + "External id": 988009,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521998.756, "dur": 0.252, + "args": { + "External id": 988010,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942521999.977, "dur": 0.250, + "args": { + "External id": 988011,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522001.055, "dur": 0.243, + "args": { + "External id": 988012,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522002.302, "dur": 0.374, + "args": { + "External id": 988013,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522003.699, "dur": 0.393, + "args": { + "External id": 988014,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522005.148, "dur": 0.478, + "args": { + "External id": 988015,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522006.637, "dur": 0.362, + "args": { + "External id": 988016,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522023.037, "dur": 0.816, + "args": { + "External id": 988017,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522027.748, "dur": 0.453, + "args": { + "External id": 988018,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522029.364, "dur": 0.327, + "args": { + "External id": 988019,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522031.155, "dur": 0.430, + "args": { + "External id": 988020,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522032.756, "dur": 0.397, + "args": { + "External id": 988021,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522034.424, "dur": 0.424, + "args": { + "External id": 988022,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522036.068, "dur": 0.275, + "args": { + "External id": 988023,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522038.123, "dur": 0.405, + "args": { + "External id": 988024,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522039.608, "dur": 0.425, + "args": { + "External id": 988025,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522043.663, "dur": 0.405, + "args": { + "External id": 988026,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522045.007, "dur": 0.265, + "args": { + "External id": 988027,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522046.218, "dur": 0.425, + "args": { + "External id": 988028,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522047.558, "dur": 0.387, + "args": { + "External id": 988029,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522048.858, "dur": 0.412, + "args": { + "External id": 988030,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522050.181, "dur": 0.338, + "args": { + "External id": 988031,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522051.388, "dur": 0.250, + "args": { + "External id": 988032,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522052.526, "dur": 0.266, + "args": { + "External id": 988033,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522085.365, "dur": 0.727, + "args": { + "External id": 988034,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522089.623, "dur": 0.389, + "args": { + "External id": 988035,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522091.032, "dur": 0.427, + "args": { + "External id": 988036,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522092.468, "dur": 0.394, + "args": { + "External id": 988037,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522093.817, "dur": 0.434, + "args": { + "External id": 988038,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522095.163, "dur": 0.545, + "args": { + "External id": 988039,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522098.166, "dur": 0.462, + "args": { + "External id": 988040,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522099.617, "dur": 0.399, + "args": { + "External id": 988041,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522103.292, "dur": 0.416, + "args": { + "External id": 988042,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522104.937, "dur": 0.298, + "args": { + "External id": 988043,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522106.330, "dur": 0.320, + "args": { + "External id": 988044,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522107.648, "dur": 0.527, + "args": { + "External id": 988045,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522109.235, "dur": 0.449, + "args": { + "External id": 988046,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522110.799, "dur": 0.382, + "args": { + "External id": 988047,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522112.822, "dur": 0.421, + "args": { + "External id": 988048,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522114.336, "dur": 0.253, + "args": { + "External id": 988049,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522117.861, "dur": 0.260, + "args": { + "External id": 988050,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522119.056, "dur": 0.252, + "args": { + "External id": 988051,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522120.308, "dur": 0.263, + "args": { + "External id": 988052,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522121.455, "dur": 0.265, + "args": { + "External id": 988053,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522122.697, "dur": 0.247, + "args": { + "External id": 988054,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522123.828, "dur": 0.252, + "args": { + "External id": 988055,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522124.978, "dur": 0.398, + "args": { + "External id": 988056,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522126.448, "dur": 0.394, + "args": { + "External id": 988057,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522130.167, "dur": 0.457, + "args": { + "External id": 988058,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522131.525, "dur": 0.373, + "args": { + "External id": 988059,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522132.762, "dur": 0.377, + "args": { + "External id": 988060,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522134.092, "dur": 0.448, + "args": { + "External id": 988061,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522135.411, "dur": 0.370, + "args": { + "External id": 988062,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522136.828, "dur": 0.336, + "args": { + "External id": 988063,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522138.076, "dur": 0.540, + "args": { + "External id": 988064,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522139.577, "dur": 0.791, + "args": { + "External id": 988065,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522143.357, "dur": 0.763, + "args": { + "External id": 988066,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522144.992, "dur": 0.448, + "args": { + "External id": 988067,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522146.442, "dur": 0.480, + "args": { + "External id": 988068,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522147.854, "dur": 0.525, + "args": { + "External id": 988069,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522149.258, "dur": 0.484, + "args": { + "External id": 988070,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522150.725, "dur": 0.483, + "args": { + "External id": 988071,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522152.134, "dur": 0.467, + "args": { + "External id": 988072,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522153.448, "dur": 0.480, + "args": { + "External id": 988073,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522159.935, "dur": 0.476, + "args": { + "External id": 988074,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522161.333, "dur": 0.283, + "args": { + "External id": 988075,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522163.060, "dur": 0.248, + "args": { + "External id": 988076,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522164.252, "dur": 0.254, + "args": { + "External id": 988077,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522165.557, "dur": 0.289, + "args": { + "External id": 988078,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522166.790, "dur": 0.246, + "args": { + "External id": 988079,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522167.981, "dur": 0.251, + "args": { + "External id": 988080,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522169.168, "dur": 0.519, + "args": { + "External id": 988081,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522173.053, "dur": 0.449, + "args": { + "External id": 988082,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522174.768, "dur": 0.258, + "args": { + "External id": 988083,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522176.012, "dur": 0.460, + "args": { + "External id": 988084,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522177.474, "dur": 0.451, + "args": { + "External id": 988085,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522178.858, "dur": 0.455, + "args": { + "External id": 988086,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522180.318, "dur": 0.481, + "args": { + "External id": 988087,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522181.735, "dur": 0.470, + "args": { + "External id": 988088,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522183.121, "dur": 0.445, + "args": { + "External id": 988089,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522186.515, "dur": 0.492, + "args": { + "External id": 988090,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522187.852, "dur": 0.253, + "args": { + "External id": 988091,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522188.986, "dur": 0.260, + "args": { + "External id": 988092,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522190.237, "dur": 0.265, + "args": { + "External id": 988093,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522191.366, "dur": 0.476, + "args": { + "External id": 988094,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522192.831, "dur": 0.452, + "args": { + "External id": 988095,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522194.335, "dur": 0.495, + "args": { + "External id": 988096,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522195.932, "dur": 0.290, + "args": { + "External id": 988097,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522199.212, "dur": 0.257, + "args": { + "External id": 988098,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522200.408, "dur": 0.257, + "args": { + "External id": 988099,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522201.553, "dur": 0.249, + "args": { + "External id": 988100,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522202.682, "dur": 0.252, + "args": { + "External id": 988101,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522203.823, "dur": 0.259, + "args": { + "External id": 988102,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522204.918, "dur": 0.248, + "args": { + "External id": 988103,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522206.027, "dur": 0.255, + "args": { + "External id": 988104,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522207.175, "dur": 0.260, + "args": { + "External id": 988105,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522210.575, "dur": 0.249, + "args": { + "External id": 988106,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522211.868, "dur": 0.254, + "args": { + "External id": 988107,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522213.703, "dur": 0.262, + "args": { + "External id": 988108,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522214.948, "dur": 0.269, + "args": { + "External id": 988109,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522216.159, "dur": 0.522, + "args": { + "External id": 988110,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522217.641, "dur": 0.532, + "args": { + "External id": 988111,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522219.071, "dur": 0.475, + "args": { + "External id": 988112,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522220.437, "dur": 0.446, + "args": { + "External id": 988113,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522223.595, "dur": 0.473, + "args": { + "External id": 988114,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522225.302, "dur": 0.261, + "args": { + "External id": 988115,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522226.416, "dur": 0.500, + "args": { + "External id": 988116,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522227.843, "dur": 0.294, + "args": { + "External id": 988117,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522229.162, "dur": 0.252, + "args": { + "External id": 988118,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522230.298, "dur": 0.257, + "args": { + "External id": 988119,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522231.478, "dur": 0.271, + "args": { + "External id": 988120,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522232.642, "dur": 0.284, + "args": { + "External id": 988121,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522235.982, "dur": 0.273, + "args": { + "External id": 988122,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522237.312, "dur": 0.266, + "args": { + "External id": 988123,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522238.422, "dur": 0.250, + "args": { + "External id": 988124,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522239.515, "dur": 0.260, + "args": { + "External id": 988125,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522240.709, "dur": 0.260, + "args": { + "External id": 988126,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522241.877, "dur": 0.253, + "args": { + "External id": 988127,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522243.037, "dur": 0.260, + "args": { + "External id": 988128,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522244.144, "dur": 0.264, + "args": { + "External id": 988129,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522246.976, "dur": 0.257, + "args": { + "External id": 988130,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522248.115, "dur": 0.258, + "args": { + "External id": 988131,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522249.272, "dur": 0.262, + "args": { + "External id": 988132,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522250.393, "dur": 0.299, + "args": { + "External id": 988133,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522251.531, "dur": 0.256, + "args": { + "External id": 988134,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522252.943, "dur": 0.259, + "args": { + "External id": 988135,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522254.175, "dur": 0.253, + "args": { + "External id": 988136,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522255.392, "dur": 0.254, + "args": { + "External id": 988137,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522259.091, "dur": 0.366, + "args": { + "External id": 988138,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522260.303, "dur": 0.542, + "args": { + "External id": 988139,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522261.775, "dur": 0.393, + "args": { + "External id": 988140,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522263.079, "dur": 0.409, + "args": { + "External id": 988141,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522264.401, "dur": 0.388, + "args": { + "External id": 988142,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522265.692, "dur": 0.517, + "args": { + "External id": 988143,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522267.550, "dur": 0.433, + "args": { + "External id": 988144,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522268.936, "dur": 0.375, + "args": { + "External id": 988145,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522272.865, "dur": 0.370, + "args": { + "External id": 988146,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942522274.109, "dur": 0.377, + "args": { + "External id": 988147,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2338710, "tid": 2338710, + "ts": 6345942522343.249, "dur": 1639.917, + "args": { + "External id": 988148,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2338710, "tid": 2338710, + "ts": 6345942522756.455, "dur": 1128.006, + "args": { + "External id": 988149,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522765.171, "dur": 11.352, + "args": { + "External id": 988150,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522771.765, "dur": 4.216, + "args": { + "External id": 988151,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522777.444, "dur": 2.078, + "args": { + "External id": 988152,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522778.668, "dur": 0.760, + "args": { + "External id": 988153,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522780.024, "dur": 5.272, + "args": { + "External id": 988154,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522782.817, "dur": 2.295, + "args": { + "External id": 988155,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522785.782, "dur": 1.567, + "args": { + "External id": 988156,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522786.523, "dur": 0.745, + "args": { + "External id": 988157,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522787.957, "dur": 3.086, + "args": { + "External id": 988158,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522789.981, "dur": 0.979, + "args": { + "External id": 988159,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522791.352, "dur": 26.093, + "args": { + "External id": 988160,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522816.745, "dur": 0.511, + "args": { + "External id": 994305,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522820.239, "dur": 1.014, + "args": { + "External id": 994306,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522820.754, "dur": 0.418, + "args": { + "External id": 994307,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522821.544, "dur": 2.968, + "args": { + "External id": 994308,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522823.688, "dur": 0.746, + "args": { + "External id": 994309,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522824.788, "dur": 2.841, + "args": { + "External id": 994310,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522826.927, "dur": 0.625, + "args": { + "External id": 994311,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522827.904, "dur": 1.268, + "args": { + "External id": 994312,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522828.334, "dur": 0.653, + "args": { + "External id": 994313,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522829.444, "dur": 4.211, + "args": { + "External id": 994314,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522831.353, "dur": 2.075, + "args": { + "External id": 994315,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522833.958, "dur": 1.323, + "args": { + "External id": 994316,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522834.568, "dur": 0.622, + "args": { + "External id": 994317,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522835.577, "dur": 2.735, + "args": { + "External id": 994318,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522837.504, "dur": 0.727, + "args": { + "External id": 994319,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522838.587, "dur": 2.485, + "args": { + "External id": 994320,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522840.403, "dur": 0.594, + "args": { + "External id": 994321,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522843.960, "dur": 1.144, + "args": { + "External id": 994322,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522844.432, "dur": 0.573, + "args": { + "External id": 994323,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522845.370, "dur": 2.975, + "args": { + "External id": 994324,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522847.563, "dur": 0.701, + "args": { + "External id": 994325,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522848.627, "dur": 2.424, + "args": { + "External id": 994326,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522850.351, "dur": 0.620, + "args": { + "External id": 994327,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522851.324, "dur": 1.620, + "args": { + "External id": 994328,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522851.963, "dur": 0.714, + "args": { + "External id": 994329,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522853.243, "dur": 3.734, + "args": { + "External id": 994330,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522855.028, "dur": 1.843, + "args": { + "External id": 994331,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522857.302, "dur": 1.350, + "args": { + "External id": 994332,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522857.863, "dur": 0.713, + "args": { + "External id": 994333,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522859.061, "dur": 2.957, + "args": { + "External id": 994334,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522860.770, "dur": 1.141, + "args": { + "External id": 994335,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522862.342, "dur": 2.733, + "args": { + "External id": 994336,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522864.451, "dur": 0.546, + "args": { + "External id": 994337,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522867.889, "dur": 1.114, + "args": { + "External id": 994338,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522868.350, "dur": 0.563, + "args": { + "External id": 994339,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522869.276, "dur": 2.950, + "args": { + "External id": 994340,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522871.248, "dur": 0.900, + "args": { + "External id": 994341,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522872.503, "dur": 2.768, + "args": { + "External id": 994342,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522874.510, "dur": 0.687, + "args": { + "External id": 994343,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522875.595, "dur": 1.177, + "args": { + "External id": 994344,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522876.029, "dur": 0.665, + "args": { + "External id": 994345,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522877.049, "dur": 3.668, + "args": { + "External id": 994346,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522878.833, "dur": 1.797, + "args": { + "External id": 994347,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522881.053, "dur": 1.286, + "args": { + "External id": 994348,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522881.597, "dur": 0.648, + "args": { + "External id": 994349,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522882.608, "dur": 2.948, + "args": { + "External id": 994350,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522884.712, "dur": 0.759, + "args": { + "External id": 994351,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522885.820, "dur": 3.086, + "args": { + "External id": 994352,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522888.047, "dur": 0.779, + "args": { + "External id": 994353,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522891.329, "dur": 1.296, + "args": { + "External id": 994354,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522891.770, "dur": 0.776, + "args": { + "External id": 994355,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522892.933, "dur": 2.764, + "args": { + "External id": 994356,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522894.640, "dur": 0.975, + "args": { + "External id": 994357,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522895.968, "dur": 3.608, + "args": { + "External id": 994358,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522898.700, "dur": 0.789, + "args": { + "External id": 994359,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522899.845, "dur": 1.276, + "args": { + "External id": 994360,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522900.497, "dur": 0.547, + "args": { + "External id": 994361,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522901.392, "dur": 3.442, + "args": { + "External id": 994362,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522903.076, "dur": 1.672, + "args": { + "External id": 994363,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522905.175, "dur": 1.227, + "args": { + "External id": 994364,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522905.602, "dur": 0.725, + "args": { + "External id": 994365,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522906.682, "dur": 2.521, + "args": { + "External id": 994366,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522908.264, "dur": 0.860, + "args": { + "External id": 994367,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522909.488, "dur": 2.705, + "args": { + "External id": 994368,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522911.491, "dur": 0.628, + "args": { + "External id": 994369,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522914.526, "dur": 1.255, + "args": { + "External id": 994370,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522914.997, "dur": 0.704, + "args": { + "External id": 994371,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522916.050, "dur": 2.621, + "args": { + "External id": 994372,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522917.904, "dur": 0.684, + "args": { + "External id": 994373,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522918.942, "dur": 2.537, + "args": { + "External id": 994374,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522920.831, "dur": 0.573, + "args": { + "External id": 994375,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522921.750, "dur": 1.327, + "args": { + "External id": 994376,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522922.191, "dur": 0.808, + "args": { + "External id": 994377,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522923.344, "dur": 3.222, + "args": { + "External id": 994378,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522925.047, "dur": 1.437, + "args": { + "External id": 994379,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522926.875, "dur": 1.462, + "args": { + "External id": 994380,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522927.457, "dur": 0.802, + "args": { + "External id": 994381,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522928.635, "dur": 3.314, + "args": { + "External id": 994382,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522930.754, "dur": 1.116, + "args": { + "External id": 994383,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522932.244, "dur": 2.320, + "args": { + "External id": 994384,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522933.968, "dur": 0.517, + "args": { + "External id": 994385,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522936.984, "dur": 1.218, + "args": { + "External id": 994386,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522937.408, "dur": 0.720, + "args": { + "External id": 994387,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522938.471, "dur": 2.965, + "args": { + "External id": 994388,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522940.464, "dur": 0.894, + "args": { + "External id": 994389,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522941.712, "dur": 2.636, + "args": { + "External id": 994390,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522943.686, "dur": 0.587, + "args": { + "External id": 994391,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522944.621, "dur": 1.150, + "args": { + "External id": 994392,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522945.044, "dur": 0.651, + "args": { + "External id": 994393,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522946.053, "dur": 3.275, + "args": { + "External id": 994394,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522948.071, "dur": 1.171, + "args": { + "External id": 994395,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522949.624, "dur": 1.791, + "args": { + "External id": 994396,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522950.298, "dur": 0.845, + "args": { + "External id": 994397,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522951.684, "dur": 3.653, + "args": { + "External id": 994398,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522953.877, "dur": 1.374, + "args": { + "External id": 994399,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522955.618, "dur": 2.784, + "args": { + "External id": 994400,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522957.580, "dur": 0.748, + "args": { + "External id": 994401,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522960.681, "dur": 1.242, + "args": { + "External id": 994402,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522961.118, "dur": 0.727, + "args": { + "External id": 994403,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522962.195, "dur": 2.717, + "args": { + "External id": 994404,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522964.195, "dur": 0.636, + "args": { + "External id": 994405,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522965.184, "dur": 2.919, + "args": { + "External id": 994406,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522967.276, "dur": 0.748, + "args": { + "External id": 994407,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522968.378, "dur": 1.373, + "args": { + "External id": 994408,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522969.050, "dur": 0.629, + "args": { + "External id": 994409,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522970.027, "dur": 4.822, + "args": { + "External id": 994410,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522973.391, "dur": 1.372, + "args": { + "External id": 994411,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522975.149, "dur": 1.301, + "args": { + "External id": 994412,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522975.612, "dur": 0.759, + "args": { + "External id": 994413,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522976.718, "dur": 2.572, + "args": { + "External id": 994414,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522978.488, "dur": 0.723, + "args": { + "External id": 994415,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522979.565, "dur": 2.533, + "args": { + "External id": 994416,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522981.139, "dur": 0.879, + "args": { + "External id": 994417,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522984.483, "dur": 1.396, + "args": { + "External id": 994418,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522984.923, "dur": 0.877, + "args": { + "External id": 994419,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522986.150, "dur": 3.119, + "args": { + "External id": 994420,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522988.371, "dur": 0.820, + "args": { + "External id": 994421,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522989.544, "dur": 2.375, + "args": { + "External id": 994422,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522991.217, "dur": 0.626, + "args": { + "External id": 994423,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522992.190, "dur": 2.036, + "args": { + "External id": 994424,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522993.490, "dur": 0.662, + "args": { + "External id": 994425,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522994.524, "dur": 3.996, + "args": { + "External id": 994426,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522996.569, "dur": 1.867, + "args": { + "External id": 994427,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942522998.818, "dur": 1.073, + "args": { + "External id": 994428,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942522999.273, "dur": 0.534, + "args": { + "External id": 994429,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523000.158, "dur": 2.680, + "args": { + "External id": 994430,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523001.938, "dur": 0.820, + "args": { + "External id": 994431,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523003.113, "dur": 3.171, + "args": { + "External id": 994432,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523005.408, "dur": 0.791, + "args": { + "External id": 994433,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523021.786, "dur": 4.155, + "args": { + "External id": 994434,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523024.673, "dur": 0.808, + "args": { + "External id": 994435,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523026.402, "dur": 3.195, + "args": { + "External id": 994436,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523028.722, "dur": 0.790, + "args": { + "External id": 994437,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523029.903, "dur": 2.295, + "args": { + "External id": 994438,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523031.518, "dur": 0.566, + "args": { + "External id": 994439,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523032.550, "dur": 1.072, + "args": { + "External id": 994440,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523032.979, "dur": 0.566, + "args": { + "External id": 994441,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523033.899, "dur": 3.797, + "args": { + "External id": 994442,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523035.912, "dur": 1.697, + "args": { + "External id": 994443,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523038.008, "dur": 1.333, + "args": { + "External id": 994444,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523038.455, "dur": 0.808, + "args": { + "External id": 994445,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523039.617, "dur": 2.860, + "args": { + "External id": 994446,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523041.468, "dur": 0.871, + "args": { + "External id": 994447,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523042.759, "dur": 2.780, + "args": { + "External id": 994448,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523044.658, "dur": 0.806, + "args": { + "External id": 994449,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523048.611, "dur": 1.112, + "args": { + "External id": 994450,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523049.079, "dur": 0.566, + "args": { + "External id": 994451,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523050.001, "dur": 2.657, + "args": { + "External id": 994452,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523051.838, "dur": 0.739, + "args": { + "External id": 994453,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523083.157, "dur": 6.446, + "args": { + "External id": 994454,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523088.454, "dur": 0.750, + "args": { + "External id": 994455,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523090.123, "dur": 1.216, + "args": { + "External id": 994456,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523090.624, "dur": 0.644, + "args": { + "External id": 994457,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523091.674, "dur": 3.344, + "args": { + "External id": 994458,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523093.481, "dur": 1.442, + "args": { + "External id": 994459,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523095.335, "dur": 1.689, + "args": { + "External id": 994460,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523095.784, "dur": 0.813, + "args": { + "External id": 994461,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523097.345, "dur": 3.270, + "args": { + "External id": 994462,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523099.633, "dur": 0.910, + "args": { + "External id": 994463,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523100.907, "dur": 2.458, + "args": { + "External id": 994464,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523102.646, "dur": 0.636, + "args": { + "External id": 994465,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523106.153, "dur": 1.402, + "args": { + "External id": 994466,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523106.685, "dur": 0.795, + "args": { + "External id": 994467,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523107.854, "dur": 1.683, + "args": { + "External id": 994468,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523108.806, "dur": 0.655, + "args": { + "External id": 994469,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523109.811, "dur": 3.716, + "args": { + "External id": 994470,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523112.748, "dur": 0.707, + "args": { + "External id": 994471,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523113.815, "dur": 2.257, + "args": { + "External id": 994472,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523115.164, "dur": 0.832, + "args": { + "External id": 994473,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523116.374, "dur": 3.630, + "args": { + "External id": 994474,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523117.806, "dur": 1.897, + "args": { + "External id": 994475,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523120.355, "dur": 1.494, + "args": { + "External id": 994476,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523121.218, "dur": 0.560, + "args": { + "External id": 994477,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523122.168, "dur": 2.244, + "args": { + "External id": 994478,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523123.627, "dur": 0.711, + "args": { + "External id": 994479,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523124.714, "dur": 3.165, + "args": { + "External id": 994480,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523127.150, "dur": 0.653, + "args": { + "External id": 994481,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523130.329, "dur": 2.189, + "args": { + "External id": 994482,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523131.673, "dur": 0.773, + "args": { + "External id": 994483,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523132.801, "dur": 1.631, + "args": { + "External id": 994484,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523133.633, "dur": 0.722, + "args": { + "External id": 994485,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523134.738, "dur": 3.400, + "args": { + "External id": 994486,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523137.448, "dur": 0.594, + "args": { + "External id": 994487,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523138.432, "dur": 1.775, + "args": { + "External id": 994488,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523139.645, "dur": 0.453, + "args": { + "External id": 994489,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523140.495, "dur": 3.050, + "args": { + "External id": 994490,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523141.609, "dur": 1.842, + "args": { + "External id": 994491,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523143.858, "dur": 1.913, + "args": { + "External id": 994492,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523144.887, "dur": 0.805, + "args": { + "External id": 994493,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523146.155, "dur": 2.388, + "args": { + "External id": 994494,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523147.705, "dur": 0.763, + "args": { + "External id": 994495,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523148.837, "dur": 3.291, + "args": { + "External id": 994496,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523151.314, "dur": 0.728, + "args": { + "External id": 994497,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523154.825, "dur": 2.005, + "args": { + "External id": 994498,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523156.082, "dur": 0.672, + "args": { + "External id": 994499,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523157.137, "dur": 1.781, + "args": { + "External id": 994500,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523158.012, "dur": 0.831, + "args": { + "External id": 994501,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523159.325, "dur": 3.980, + "args": { + "External id": 994502,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523162.527, "dur": 0.703, + "args": { + "External id": 994503,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523163.634, "dur": 1.912, + "args": { + "External id": 994504,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523164.653, "dur": 0.819, + "args": { + "External id": 994505,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523165.839, "dur": 2.799, + "args": { + "External id": 994506,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523166.963, "dur": 1.593, + "args": { + "External id": 994507,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523168.978, "dur": 2.018, + "args": { + "External id": 994508,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523169.821, "dur": 1.099, + "args": { + "External id": 994509,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523171.324, "dur": 1.965, + "args": { + "External id": 994510,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523172.458, "dur": 0.740, + "args": { + "External id": 994511,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523173.620, "dur": 3.112, + "args": { + "External id": 994512,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523175.912, "dur": 0.746, + "args": { + "External id": 994513,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523179.069, "dur": 2.243, + "args": { + "External id": 994514,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523180.272, "dur": 0.965, + "args": { + "External id": 994515,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523181.638, "dur": 1.547, + "args": { + "External id": 994516,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523182.445, "dur": 0.666, + "args": { + "External id": 994517,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523183.494, "dur": 4.311, + "args": { + "External id": 994518,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523186.945, "dur": 0.783, + "args": { + "External id": 994519,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523188.129, "dur": 1.703, + "args": { + "External id": 994520,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523189.252, "dur": 0.508, + "args": { + "External id": 994521,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523190.138, "dur": 3.440, + "args": { + "External id": 994522,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523191.567, "dur": 1.925, + "args": { + "External id": 994523,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523193.890, "dur": 1.471, + "args": { + "External id": 994524,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523194.672, "dur": 0.614, + "args": { + "External id": 994525,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523195.658, "dur": 2.463, + "args": { + "External id": 994526,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523197.207, "dur": 0.665, + "args": { + "External id": 994527,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523198.414, "dur": 2.929, + "args": { + "External id": 994528,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523200.659, "dur": 0.593, + "args": { + "External id": 994529,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523203.881, "dur": 1.999, + "args": { + "External id": 994530,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523205.135, "dur": 0.672, + "args": { + "External id": 994531,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523206.177, "dur": 2.076, + "args": { + "External id": 994532,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523207.368, "dur": 0.810, + "args": { + "External id": 994533,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523208.544, "dur": 3.252, + "args": { + "External id": 994534,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523211.134, "dur": 0.591, + "args": { + "External id": 994535,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523212.079, "dur": 1.515, + "args": { + "External id": 994536,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523213.072, "dur": 0.448, + "args": { + "External id": 994537,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523213.892, "dur": 3.250, + "args": { + "External id": 994538,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523215.223, "dur": 1.829, + "args": { + "External id": 994539,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523217.453, "dur": 1.621, + "args": { + "External id": 994540,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523218.357, "dur": 0.623, + "args": { + "External id": 994541,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523219.357, "dur": 2.317, + "args": { + "External id": 994542,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523220.764, "dur": 0.835, + "args": { + "External id": 994543,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523221.975, "dur": 2.766, + "args": { + "External id": 994544,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523223.854, "dur": 0.814, + "args": { + "External id": 994545,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523227.316, "dur": 2.206, + "args": { + "External id": 994546,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523228.695, "dur": 0.750, + "args": { + "External id": 994547,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523229.803, "dur": 1.586, + "args": { + "External id": 994548,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523230.575, "dur": 0.739, + "args": { + "External id": 994549,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523231.695, "dur": 3.850, + "args": { + "External id": 994550,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523234.245, "dur": 1.137, + "args": { + "External id": 994551,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523235.881, "dur": 1.684, + "args": { + "External id": 994552,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523236.852, "dur": 0.634, + "args": { + "External id": 994553,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523237.858, "dur": 3.099, + "args": { + "External id": 994554,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523239.098, "dur": 1.695, + "args": { + "External id": 994555,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523241.300, "dur": 2.282, + "args": { + "External id": 994556,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523242.725, "dur": 0.785, + "args": { + "External id": 994557,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523243.925, "dur": 2.380, + "args": { + "External id": 994558,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523245.137, "dur": 1.091, + "args": { + "External id": 994559,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523246.587, "dur": 2.791, + "args": { + "External id": 994560,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523248.793, "dur": 0.511, + "args": { + "External id": 994561,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523252.168, "dur": 2.207, + "args": { + "External id": 994562,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523253.672, "dur": 0.627, + "args": { + "External id": 994563,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523254.690, "dur": 2.042, + "args": { + "External id": 994564,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523255.860, "dur": 0.795, + "args": { + "External id": 994565,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523257.015, "dur": 4.421, + "args": { + "External id": 994566,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523260.565, "dur": 0.793, + "args": { + "External id": 994567,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523261.731, "dur": 2.137, + "args": { + "External id": 994568,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523262.838, "dur": 0.954, + "args": { + "External id": 994569,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523264.155, "dur": 3.224, + "args": { + "External id": 994570,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523265.755, "dur": 1.520, + "args": { + "External id": 994571,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523267.711, "dur": 1.494, + "args": { + "External id": 994572,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523268.479, "dur": 0.635, + "args": { + "External id": 994573,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523269.512, "dur": 2.093, + "args": { + "External id": 994574,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523270.843, "dur": 0.681, + "args": { + "External id": 994575,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523271.912, "dur": 3.026, + "args": { + "External id": 994576,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523274.237, "dur": 0.622, + "args": { + "External id": 994577,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523277.636, "dur": 2.088, + "args": { + "External id": 994578,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523279.137, "dur": 0.515, + "args": { + "External id": 994579,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523280.024, "dur": 2.033, + "args": { + "External id": 994580,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523281.049, "dur": 0.933, + "args": { + "External id": 994581,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523282.349, "dur": 4.162, + "args": { + "External id": 994582,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523285.688, "dur": 0.743, + "args": { + "External id": 994583,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523286.808, "dur": 1.527, + "args": { + "External id": 994584,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523287.503, "dur": 0.754, + "args": { + "External id": 994585,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523288.621, "dur": 2.283, + "args": { + "External id": 994586,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523289.537, "dur": 1.286, + "args": { + "External id": 994587,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523291.199, "dur": 1.699, + "args": { + "External id": 994588,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523291.998, "dur": 0.825, + "args": { + "External id": 994589,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523293.189, "dur": 1.940, + "args": { + "External id": 994590,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523294.272, "dur": 0.754, + "args": { + "External id": 994591,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523295.424, "dur": 2.904, + "args": { + "External id": 994592,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523297.511, "dur": 0.741, + "args": { + "External id": 994593,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523300.554, "dur": 2.322, + "args": { + "External id": 994594,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523302.018, "dur": 0.746, + "args": { + "External id": 994595,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523303.172, "dur": 1.627, + "args": { + "External id": 994596,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523303.904, "dur": 0.815, + "args": { + "External id": 994597,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523305.092, "dur": 2.952, + "args": { + "External id": 994598,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523307.449, "dur": 0.518, + "args": { + "External id": 994599,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523308.336, "dur": 1.876, + "args": { + "External id": 994600,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523309.566, "dur": 0.567, + "args": { + "External id": 994601,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523310.536, "dur": 2.736, + "args": { + "External id": 994602,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523312.073, "dur": 1.107, + "args": { + "External id": 994603,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523313.575, "dur": 2.005, + "args": { + "External id": 994604,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523314.626, "dur": 0.688, + "args": { + "External id": 994605,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523315.878, "dur": 2.278, + "args": { + "External id": 994606,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523317.191, "dur": 0.890, + "args": { + "External id": 994607,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523318.459, "dur": 3.359, + "args": { + "External id": 994608,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523320.551, "dur": 1.187, + "args": { + "External id": 994609,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523324.408, "dur": 1.874, + "args": { + "External id": 994610,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523325.415, "dur": 0.790, + "args": { + "External id": 994611,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523326.566, "dur": 2.134, + "args": { + "External id": 994612,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523327.674, "dur": 0.951, + "args": { + "External id": 994613,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523328.986, "dur": 4.099, + "args": { + "External id": 994614,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523331.888, "dur": 0.721, + "args": { + "External id": 994615,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523333.379, "dur": 1.737, + "args": { + "External id": 994616,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523334.250, "dur": 0.789, + "args": { + "External id": 994617,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523335.405, "dur": 3.690, + "args": { + "External id": 994618,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523337.276, "dur": 1.727, + "args": { + "External id": 994619,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523339.390, "dur": 2.102, + "args": { + "External id": 994620,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523340.541, "dur": 0.879, + "args": { + "External id": 994621,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523341.803, "dur": 1.884, + "args": { + "External id": 994622,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523342.915, "dur": 0.697, + "args": { + "External id": 994623,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523343.984, "dur": 2.746, + "args": { + "External id": 994624,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523346.076, "dur": 0.576, + "args": { + "External id": 994625,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523348.960, "dur": 1.902, + "args": { + "External id": 994626,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523350.200, "dur": 0.587, + "args": { + "External id": 994627,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523351.144, "dur": 1.678, + "args": { + "External id": 994628,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523351.924, "dur": 0.728, + "args": { + "External id": 994629,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523353.178, "dur": 2.848, + "args": { + "External id": 994630,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523355.359, "dur": 0.594, + "args": { + "External id": 994631,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523356.312, "dur": 2.137, + "args": { + "External id": 994632,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523357.607, "dur": 0.767, + "args": { + "External id": 994633,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523358.743, "dur": 3.851, + "args": { + "External id": 994634,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523360.518, "dur": 1.996, + "args": { + "External id": 994635,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523362.901, "dur": 1.723, + "args": { + "External id": 994636,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523363.872, "dur": 0.677, + "args": { + "External id": 994637,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523364.972, "dur": 2.200, + "args": { + "External id": 994638,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523366.335, "dur": 0.763, + "args": { + "External id": 994639,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523367.471, "dur": 3.093, + "args": { + "External id": 994640,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523369.643, "dur": 0.842, + "args": { + "External id": 994641,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523372.938, "dur": 1.904, + "args": { + "External id": 994642,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523374.108, "dur": 0.653, + "args": { + "External id": 994643,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523375.138, "dur": 1.933, + "args": { + "External id": 994644,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523376.152, "dur": 0.846, + "args": { + "External id": 994645,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523377.366, "dur": 3.880, + "args": { + "External id": 994646,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523380.435, "dur": 0.736, + "args": { + "External id": 994647,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523381.557, "dur": 1.765, + "args": { + "External id": 994648,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523382.487, "dur": 0.757, + "args": { + "External id": 994649,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523383.627, "dur": 2.625, + "args": { + "External id": 994650,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523385.078, "dur": 1.092, + "args": { + "External id": 994651,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523386.548, "dur": 2.236, + "args": { + "External id": 994652,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523387.852, "dur": 0.852, + "args": { + "External id": 994653,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523389.337, "dur": 2.356, + "args": { + "External id": 994654,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523390.839, "dur": 0.778, + "args": { + "External id": 994655,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523391.978, "dur": 2.954, + "args": { + "External id": 994656,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523394.044, "dur": 0.813, + "args": { + "External id": 994657,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523397.729, "dur": 1.990, + "args": { + "External id": 994658,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523399.011, "dur": 0.609, + "args": { + "External id": 994659,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523400.008, "dur": 1.651, + "args": { + "External id": 994660,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523400.892, "dur": 0.694, + "args": { + "External id": 994661,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523401.946, "dur": 2.534, + "args": { + "External id": 994662,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523403.868, "dur": 0.537, + "args": { + "External id": 994663,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523404.799, "dur": 1.540, + "args": { + "External id": 994664,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523405.595, "dur": 0.670, + "args": { + "External id": 994665,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523406.629, "dur": 3.235, + "args": { + "External id": 994666,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523408.147, "dur": 1.616, + "args": { + "External id": 994667,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523410.155, "dur": 1.829, + "args": { + "External id": 994668,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523411.117, "dur": 0.793, + "args": { + "External id": 994669,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523412.270, "dur": 1.938, + "args": { + "External id": 994670,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523413.323, "dur": 0.808, + "args": { + "External id": 994671,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523414.493, "dur": 3.832, + "args": { + "External id": 994672,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523417.315, "dur": 0.932, + "args": { + "External id": 994673,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523420.791, "dur": 2.301, + "args": { + "External id": 994674,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523422.074, "dur": 0.943, + "args": { + "External id": 994675,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523423.387, "dur": 2.105, + "args": { + "External id": 994676,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523424.353, "dur": 1.063, + "args": { + "External id": 994677,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523425.778, "dur": 3.488, + "args": { + "External id": 994678,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523428.335, "dur": 0.853, + "args": { + "External id": 994679,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523429.554, "dur": 3.239, + "args": { + "External id": 994680,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523431.868, "dur": 0.831, + "args": { + "External id": 994681,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523433.206, "dur": 2.810, + "args": { + "External id": 994682,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523434.618, "dur": 1.320, + "args": { + "External id": 994683,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523436.306, "dur": 1.660, + "args": { + "External id": 994684,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523437.306, "dur": 0.573, + "args": { + "External id": 994685,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523438.261, "dur": 2.364, + "args": { + "External id": 994686,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523439.807, "dur": 0.733, + "args": { + "External id": 994687,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523440.932, "dur": 3.210, + "args": { + "External id": 994688,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523443.236, "dur": 0.816, + "args": { + "External id": 994689,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523446.363, "dur": 2.136, + "args": { + "External id": 994690,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523447.334, "dur": 1.092, + "args": { + "External id": 994691,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523448.785, "dur": 2.418, + "args": { + "External id": 994692,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523449.953, "dur": 1.170, + "args": { + "External id": 994693,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523451.488, "dur": 3.627, + "args": { + "External id": 994694,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523454.070, "dur": 0.963, + "args": { + "External id": 994695,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523455.403, "dur": 1.800, + "args": { + "External id": 994696,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523456.286, "dur": 0.813, + "args": { + "External id": 994697,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523457.488, "dur": 3.542, + "args": { + "External id": 994698,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523458.963, "dur": 1.985, + "args": { + "External id": 994699,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523461.317, "dur": 2.054, + "args": { + "External id": 994700,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523462.617, "dur": 0.681, + "args": { + "External id": 994701,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523463.772, "dur": 2.020, + "args": { + "External id": 994702,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523464.937, "dur": 0.775, + "args": { + "External id": 994703,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523466.078, "dur": 2.910, + "args": { + "External id": 994704,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523468.298, "dur": 0.614, + "args": { + "External id": 994705,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523471.230, "dur": 1.956, + "args": { + "External id": 994706,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523472.652, "dur": 0.456, + "args": { + "External id": 994707,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523473.468, "dur": 2.634, + "args": { + "External id": 994708,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523474.853, "dur": 1.070, + "args": { + "External id": 994709,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523476.389, "dur": 3.503, + "args": { + "External id": 994710,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523479.100, "dur": 0.622, + "args": { + "External id": 994711,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523480.267, "dur": 1.595, + "args": { + "External id": 994712,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523480.944, "dur": 0.814, + "args": { + "External id": 994713,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523482.181, "dur": 2.895, + "args": { + "External id": 994714,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523483.606, "dur": 1.389, + "args": { + "External id": 994715,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523485.365, "dur": 1.829, + "args": { + "External id": 994716,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523486.431, "dur": 0.672, + "args": { + "External id": 994717,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523487.538, "dur": 1.971, + "args": { + "External id": 994718,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523488.639, "dur": 0.788, + "args": { + "External id": 994719,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523489.799, "dur": 2.499, + "args": { + "External id": 994720,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523491.645, "dur": 0.579, + "args": { + "External id": 994721,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523494.708, "dur": 1.697, + "args": { + "External id": 994722,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523495.757, "dur": 0.576, + "args": { + "External id": 994723,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523496.726, "dur": 2.086, + "args": { + "External id": 994724,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523497.606, "dur": 0.942, + "args": { + "External id": 994725,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523499.094, "dur": 3.825, + "args": { + "External id": 994726,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523501.978, "dur": 0.868, + "args": { + "External id": 994727,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523503.202, "dur": 2.104, + "args": { + "External id": 994728,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523504.443, "dur": 0.788, + "args": { + "External id": 994729,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523505.636, "dur": 3.325, + "args": { + "External id": 994730,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523506.801, "dur": 2.080, + "args": { + "External id": 994731,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523509.251, "dur": 1.700, + "args": { + "External id": 994732,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523509.999, "dur": 0.874, + "args": { + "External id": 994733,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523511.247, "dur": 2.376, + "args": { + "External id": 994734,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523512.613, "dur": 0.906, + "args": { + "External id": 994735,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523513.903, "dur": 3.016, + "args": { + "External id": 994736,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523516.221, "dur": 0.619, + "args": { + "External id": 994737,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523519.733, "dur": 1.959, + "args": { + "External id": 994738,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523520.948, "dur": 0.669, + "args": { + "External id": 994739,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523522.011, "dur": 1.448, + "args": { + "External id": 994740,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523522.796, "dur": 0.585, + "args": { + "External id": 994741,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523523.752, "dur": 3.371, + "args": { + "External id": 994742,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523526.400, "dur": 0.647, + "args": { + "External id": 994743,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523527.408, "dur": 1.390, + "args": { + "External id": 994744,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523528.118, "dur": 0.602, + "args": { + "External id": 994745,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523529.098, "dur": 2.585, + "args": { + "External id": 994746,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523530.348, "dur": 1.255, + "args": { + "External id": 994747,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523531.972, "dur": 1.655, + "args": { + "External id": 994748,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523532.889, "dur": 0.663, + "args": { + "External id": 994749,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523534.019, "dur": 2.040, + "args": { + "External id": 994750,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523535.048, "dur": 0.925, + "args": { + "External id": 994751,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523536.389, "dur": 3.178, + "args": { + "External id": 994752,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523538.853, "dur": 0.640, + "args": { + "External id": 994753,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523541.843, "dur": 2.574, + "args": { + "External id": 994754,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523543.319, "dur": 0.820, + "args": { + "External id": 994755,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523544.744, "dur": 2.139, + "args": { + "External id": 994756,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523545.591, "dur": 1.213, + "args": { + "External id": 994757,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523547.212, "dur": 3.647, + "args": { + "External id": 994758,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523550.105, "dur": 0.683, + "args": { + "External id": 994759,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523551.158, "dur": 1.668, + "args": { + "External id": 994760,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523551.915, "dur": 0.836, + "args": { + "External id": 994761,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523553.261, "dur": 2.765, + "args": { + "External id": 994762,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523554.228, "dur": 1.719, + "args": { + "External id": 994763,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523556.316, "dur": 1.529, + "args": { + "External id": 994764,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523557.200, "dur": 0.559, + "args": { + "External id": 994765,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523558.385, "dur": 2.202, + "args": { + "External id": 994766,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523559.712, "dur": 0.799, + "args": { + "External id": 994767,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523560.873, "dur": 3.645, + "args": { + "External id": 994768,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523563.573, "dur": 0.865, + "args": { + "External id": 994769,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523566.939, "dur": 2.278, + "args": { + "External id": 994770,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523568.196, "dur": 0.917, + "args": { + "External id": 994771,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523569.580, "dur": 1.921, + "args": { + "External id": 994772,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523570.545, "dur": 0.879, + "args": { + "External id": 994773,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523571.826, "dur": 3.350, + "args": { + "External id": 994774,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523574.361, "dur": 0.743, + "args": { + "External id": 994775,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523575.465, "dur": 1.733, + "args": { + "External id": 994776,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523576.416, "dur": 0.692, + "args": { + "External id": 994777,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523577.548, "dur": 7.034, + "args": { + "External id": 994778,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523582.023, "dur": 2.423, + "args": { + "External id": 994779,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523584.886, "dur": 1.582, + "args": { + "External id": 994780,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523585.869, "dur": 0.499, + "args": { + "External id": 994781,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523586.862, "dur": 2.026, + "args": { + "External id": 994782,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523588.106, "dur": 0.707, + "args": { + "External id": 994783,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523589.241, "dur": 3.787, + "args": { + "External id": 994784,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523591.980, "dur": 0.975, + "args": { + "External id": 994785,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523595.603, "dur": 1.590, + "args": { + "External id": 994786,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523596.480, "dur": 0.641, + "args": { + "External id": 994787,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523597.528, "dur": 1.812, + "args": { + "External id": 994788,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523598.617, "dur": 0.646, + "args": { + "External id": 994789,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523599.625, "dur": 3.050, + "args": { + "External id": 994790,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523602.076, "dur": 0.521, + "args": { + "External id": 994791,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523603.054, "dur": 1.859, + "args": { + "External id": 994792,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523604.098, "dur": 0.739, + "args": { + "External id": 994793,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523605.246, "dur": 2.647, + "args": { + "External id": 994794,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523606.522, "dur": 1.291, + "args": { + "External id": 994795,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523608.208, "dur": 2.281, + "args": { + "External id": 994796,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523609.388, "dur": 1.010, + "args": { + "External id": 994797,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523610.860, "dur": 2.184, + "args": { + "External id": 994798,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523611.957, "dur": 1.011, + "args": { + "External id": 994799,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523613.351, "dur": 3.180, + "args": { + "External id": 994800,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523615.888, "dur": 0.567, + "args": { + "External id": 994801,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523618.919, "dur": 1.904, + "args": { + "External id": 994802,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523619.958, "dur": 0.790, + "args": { + "External id": 994803,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523621.136, "dur": 2.010, + "args": { + "External id": 994804,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523622.199, "dur": 0.869, + "args": { + "External id": 994805,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523623.474, "dur": 3.490, + "args": { + "External id": 994806,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523626.202, "dur": 0.684, + "args": { + "External id": 994807,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523627.266, "dur": 1.478, + "args": { + "External id": 994808,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523628.037, "dur": 0.636, + "args": { + "External id": 994809,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523629.049, "dur": 3.121, + "args": { + "External id": 994810,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523630.544, "dur": 1.545, + "args": { + "External id": 994811,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523632.484, "dur": 1.399, + "args": { + "External id": 994812,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523633.246, "dur": 0.562, + "args": { + "External id": 994813,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523634.228, "dur": 2.545, + "args": { + "External id": 994814,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523635.447, "dur": 1.048, + "args": { + "External id": 994815,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523637.081, "dur": 25.552, + "args": { + "External id": 994816,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523661.976, "dur": 0.533, + "args": { + "External id": 994817,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523665.234, "dur": 1.963, + "args": { + "External id": 994818,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523666.614, "dur": 0.491, + "args": { + "External id": 994819,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523667.492, "dur": 1.346, + "args": { + "External id": 994820,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523668.132, "dur": 0.537, + "args": { + "External id": 994821,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523669.127, "dur": 3.174, + "args": { + "External id": 994822,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523671.624, "dur": 0.600, + "args": { + "External id": 994823,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523672.678, "dur": 1.495, + "args": { + "External id": 994824,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523673.557, "dur": 0.505, + "args": { + "External id": 994825,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523674.473, "dur": 3.212, + "args": { + "External id": 994826,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523675.844, "dur": 1.759, + "args": { + "External id": 994827,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523678.159, "dur": 1.865, + "args": { + "External id": 994828,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523679.080, "dur": 0.868, + "args": { + "External id": 994829,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523680.493, "dur": 2.467, + "args": { + "External id": 994830,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523681.984, "dur": 0.902, + "args": { + "External id": 994831,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523683.326, "dur": 2.974, + "args": { + "External id": 994832,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523685.613, "dur": 0.613, + "args": { + "External id": 994833,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523689.179, "dur": 1.987, + "args": { + "External id": 994834,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523690.534, "dur": 0.559, + "args": { + "External id": 994835,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523691.507, "dur": 1.446, + "args": { + "External id": 994836,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523692.193, "dur": 0.687, + "args": { + "External id": 994837,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338710, "tid": 2338710, + "ts": 6345942523693.240, "dur": 3.047, + "args": { + "External id": 994838,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942523695.637, "dur": 0.576, + "args": { + "External id": 994839,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 2338710, "tid": 2338710, + "ts": 6345942523719.447, "dur": 151.037, + "args": { + "External id": 994840,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linalg_vector_norm", "pid": 2338710, "tid": 2338710, + "ts": 6345942524000.433, "dur": 217.889, + "args": { + "External id": 994841,"Record function id": 0, "Concrete Inputs": ["", "2.", "", "False", ""], "Input type": ["float", "Scalar", "", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[273], [], [], [], []], "Ev Idx": 21597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linalg_vector_norm", "pid": 2338710, "tid": 2338710, + "ts": 6345942524129.874, "dur": 61.874, + "args": { + "External id": 994842,"Record function id": 0, "Concrete Inputs": ["", "2.", "", "False", ""], "Input type": ["float", "Scalar", "", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[273], [], [], [], []], "Ev Idx": 21598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338710, "tid": 2338710, + "ts": 6345942524148.719, "dur": 3.029, + "args": { + "External id": 994843,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Redistribute", "pid": 2338710, "tid": 2338710, + "ts": 6345942524646.565, "dur": 1042.221, + "args": { + "External id": 994844,"Sequence number": 10552706, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "False"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 2338710, "tid": 2338710, + "ts": 6345942524714.263, "dur": 52.973, + "args": { + "External id": 994845,"Record function id": 0, "Concrete Inputs": ["", "2."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942524721.156, "dur": 1.787, + "args": { + "External id": 994846,"Record function id": 0, "Concrete Inputs": ["", "2."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942524724.986, "dur": 0.756, + "args": { + "External id": 994847,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 21603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_c10d_functional::all_reduce", "pid": 2338710, "tid": 2338710, + "ts": 6345942524813.018, "dur": 495.920, + "args": { + "External id": 994848,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["float", "", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 21604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338710, "tid": 2338710, + "ts": 6345942524817.368, "dur": 52.701, + "args": { + "External id": 994849,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338710, "tid": 2338710, + "ts": 6345942524821.423, "dur": 11.285, + "args": { + "External id": 994850,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "0"], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 21606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345942524826.183, "dur": 5.380, + "args": { + "External id": 994851,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 21607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338710, "tid": 2338710, + "ts": 6345942524834.557, "dur": 34.748, + "args": { + "External id": 994852,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 21608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::allreduce_", "pid": 2338710, "tid": 2338710, + "ts": 6345942524878.718, "dur": 426.373, + "args": { + "External id": 994853,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "-1"], "Input type": ["TensorList", "", "", "", "Scalar"], "Input Strides": [[[]], [], [], [], []], "Input Dims": [[[]], [], [], [], []], "Ev Idx": 21609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345942524919.373, "dur": 377.316, + "args": { + "External id": 994854,"Record function id": 0, "Collective name": "allreduce", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 4, "Input Strides": [[[]], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "4", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1, "Process Group Name": "0", "Input type": ["TensorList", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[[]], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 21610, "In msg nelems": 1 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:all_reduce", "pid": 2338710, "tid": 2338710, + "ts": 6345942524934.423, "dur": 354.986, + "args": { + "External id": 994855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 2338710, "tid": 2338710, + "ts": 6345942525391.169, "dur": 249.657, + "args": { + "External id": 994856,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_c10d_functional::wait_tensor", "pid": 2338710, "tid": 2338710, + "ts": 6345942525511.296, "dur": 41.849, + "args": { + "External id": 994857,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338710, "tid": 2338710, + "ts": 6345942525536.661, "dur": 5.811, + "args": { + "External id": 994858,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "4", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 21614, "In msg nelems": 0, "Rank": 4, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 2338710, "tid": 2338710, + "ts": 6345942525585.229, "dur": 48.244, + "args": { + "External id": 994859,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942525588.713, "dur": 1.312, + "args": { + "External id": 994860,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942525591.644, "dur": 0.669, + "args": { + "External id": 994861,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 21617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_ToTorchTensor", "pid": 2338710, "tid": 2338710, + "ts": 6345942525710.733, "dur": 141.475, + "args": { + "External id": 994862,"Sequence number": 10552707, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338710, "tid": 2338710, + "ts": 6345942525833.759, "dur": 11.474, + "args": { + "External id": 994863,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338710, "tid": 2338710, + "ts": 6345942525838.247, "dur": 6.786, + "args": { + "External id": 994864,"Record function id": 0, "Concrete Inputs": ["", "[]"], "Input type": ["float", "ScalarList"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338710, "tid": 2338710, + "ts": 6345942526300.248, "dur": 56.812, + "args": { + "External id": 994865,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "double", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 21621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reciprocal", "pid": 2338710, "tid": 2338710, + "ts": 6345942526369.481, "dur": 23.365, + "args": { + "External id": 994866,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mul", "pid": 2338710, "tid": 2338710, + "ts": 6345942526401.550, "dur": 25.210, + "args": { + "External id": 994867,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "double"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clamp", "pid": 2338710, "tid": 2338710, + "ts": 6345942526441.717, "dur": 26.896, + "args": { + "External id": 994868,"Record function id": 0, "Concrete Inputs": ["", "", "1."], "Input type": ["float", "", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 21624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942526445.390, "dur": 0.822, + "args": { + "External id": 994869,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 21625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338710, "tid": 2338710, + "ts": 6345942526488.801, "dur": 0.552, + "args": { + "External id": 994870,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_mul_", "pid": 2338710, "tid": 2338710, + "ts": 6345942526635.463, "dur": 1213.215, + "args": { + "External id": 994871,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["TensorList", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_mul_", "pid": 2338710, "tid": 2338710, + "ts": 6345942527156.988, "dur": 644.948, + "args": { + "External id": 994872,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["TensorList", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::isnan", "pid": 2338710, "tid": 2338710, + "ts": 6345942527908.394, "dur": 36.845, + "args": { + "External id": 994873,"Sequence number": 10552708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338710, "tid": 2338710, + "ts": 6345942527912.218, "dur": 32.307, + "args": { + "External id": 994874,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338710, "tid": 2338710, + "ts": 6345942527950.080, "dur": 7844.787, + "args": { + "External id": 994875,"Sequence number": 10552708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338710, "tid": 2338710, + "ts": 6345942527952.179, "dur": 7842.140, + "args": { + "External id": 994876,"Sequence number": 10552708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338710, "tid": 2338710, + "ts": 6345942527954.659, "dur": 7836.744, + "args": { + "External id": 994877,"Sequence number": 10552708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::isinf", "pid": 2338710, "tid": 2338710, + "ts": 6345942535811.521, "dur": 97.503, + "args": { + "External id": 994878,"Sequence number": 10552708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345942535816.627, "dur": 62.924, + "args": { + "External id": 994879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338710, "tid": 2338710, + "ts": 6345942535825.139, "dur": 6.457, + "args": { + "External id": 994880,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 21636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338710, "tid": 2338710, + "ts": 6345942535834.670, "dur": 44.368, + "args": { + "External id": 994881,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], [1]], "Input Dims": [[], [0]], "Ev Idx": 21637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338710, "tid": 2338710, + "ts": 6345942535849.972, "dur": 4.952, + "args": { + "External id": 994882,"Record function id": 0, "Concrete Inputs": ["", "[]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 21638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338710, "tid": 2338710, + "ts": 6345942535881.963, "dur": 26.089, + "args": { + "External id": 994883,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338710, "tid": 2338710, + "ts": 6345942535912.991, "dur": 46.349, + "args": { + "External id": 994884,"Sequence number": 10552708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338710, "tid": 2338710, + "ts": 6345942535914.186, "dur": 44.913, + "args": { + "External id": 994885,"Sequence number": 10552708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338710, "tid": 2338710, + "ts": 6345942535915.360, "dur": 43.417, + "args": { + "External id": 994886,"Sequence number": 10552708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21642 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.step#OptimizersContainer.step", "pid": 2338710, "tid": 2338710, + "ts": 6345942536021.847, "dur": 6635.409, + "args": { + "External id": 994887,"Record function id": 0, "Ev Idx": 21643 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.step#AdamW.step", "pid": 2338710, "tid": 2338710, + "ts": 6345942536104.709, "dur": 6522.545, + "args": { + "External id": 994888,"Record function id": 0, "Ev Idx": 21644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_add_", "pid": 2338710, "tid": 2338710, + "ts": 6345942537640.517, "dur": 260.104, + "args": { + "External id": 994889,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537664.459, "dur": 1.897, + "args": { + "External id": 994890,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537668.310, "dur": 0.313, + "args": { + "External id": 994891,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537669.219, "dur": 0.298, + "args": { + "External id": 994892,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537669.990, "dur": 0.072, + "args": { + "External id": 994893,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537670.653, "dur": 0.208, + "args": { + "External id": 994894,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537671.337, "dur": 0.079, + "args": { + "External id": 994895,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537672.094, "dur": 0.081, + "args": { + "External id": 994896,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537672.722, "dur": 0.060, + "args": { + "External id": 994897,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537673.162, "dur": 0.339, + "args": { + "External id": 994898,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537673.854, "dur": 0.093, + "args": { + "External id": 994899,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537674.374, "dur": 0.097, + "args": { + "External id": 994900,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537674.938, "dur": 0.085, + "args": { + "External id": 994901,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537675.534, "dur": 0.062, + "args": { + "External id": 994902,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537676.088, "dur": 0.064, + "args": { + "External id": 994903,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537676.774, "dur": 0.061, + "args": { + "External id": 994904,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537677.291, "dur": 0.077, + "args": { + "External id": 994905,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537677.789, "dur": 0.104, + "args": { + "External id": 994906,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537678.362, "dur": 0.321, + "args": { + "External id": 994907,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537679.080, "dur": 0.316, + "args": { + "External id": 994908,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537679.848, "dur": 0.080, + "args": { + "External id": 994909,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537680.444, "dur": 0.063, + "args": { + "External id": 994910,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537680.969, "dur": 0.075, + "args": { + "External id": 994911,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537681.524, "dur": 0.079, + "args": { + "External id": 994912,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537682.010, "dur": 0.440, + "args": { + "External id": 994913,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537682.895, "dur": 0.303, + "args": { + "External id": 994914,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537683.677, "dur": 0.308, + "args": { + "External id": 994915,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537684.314, "dur": 0.321, + "args": { + "External id": 994916,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537685.266, "dur": 0.077, + "args": { + "External id": 994917,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537685.759, "dur": 0.082, + "args": { + "External id": 994918,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537686.282, "dur": 0.079, + "args": { + "External id": 994919,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537686.776, "dur": 0.080, + "args": { + "External id": 994920,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537687.233, "dur": 0.082, + "args": { + "External id": 994921,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537687.749, "dur": 0.080, + "args": { + "External id": 994922,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537688.491, "dur": 0.080, + "args": { + "External id": 994923,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537688.996, "dur": 0.075, + "args": { + "External id": 994924,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537689.533, "dur": 0.061, + "args": { + "External id": 994925,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537690.018, "dur": 0.080, + "args": { + "External id": 994926,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537690.517, "dur": 0.079, + "args": { + "External id": 994927,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537691.072, "dur": 0.084, + "args": { + "External id": 994928,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537691.609, "dur": 0.076, + "args": { + "External id": 994929,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537692.549, "dur": 0.085, + "args": { + "External id": 994930,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537693.129, "dur": 0.370, + "args": { + "External id": 994931,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537693.962, "dur": 0.314, + "args": { + "External id": 994932,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537694.729, "dur": 0.076, + "args": { + "External id": 994933,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537695.226, "dur": 0.087, + "args": { + "External id": 994934,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537695.716, "dur": 0.087, + "args": { + "External id": 994935,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537696.229, "dur": 0.300, + "args": { + "External id": 994936,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537696.921, "dur": 0.299, + "args": { + "External id": 994937,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537697.554, "dur": 0.100, + "args": { + "External id": 994938,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537698.082, "dur": 0.304, + "args": { + "External id": 994939,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537698.809, "dur": 0.300, + "args": { + "External id": 994940,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537699.406, "dur": 0.078, + "args": { + "External id": 994941,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537699.969, "dur": 0.187, + "args": { + "External id": 994942,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537700.570, "dur": 0.099, + "args": { + "External id": 994943,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537701.168, "dur": 0.196, + "args": { + "External id": 994944,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537701.789, "dur": 0.254, + "args": { + "External id": 994945,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537702.469, "dur": 0.220, + "args": { + "External id": 994946,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537703.179, "dur": 0.199, + "args": { + "External id": 994947,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537703.816, "dur": 0.258, + "args": { + "External id": 994948,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537704.494, "dur": 0.077, + "args": { + "External id": 994949,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537704.983, "dur": 0.186, + "args": { + "External id": 994950,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537705.504, "dur": 0.089, + "args": { + "External id": 994951,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537705.996, "dur": 0.087, + "args": { + "External id": 994952,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537706.623, "dur": 0.080, + "args": { + "External id": 994953,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537707.085, "dur": 0.078, + "args": { + "External id": 994954,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537707.566, "dur": 0.083, + "args": { + "External id": 994955,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537708.158, "dur": 0.081, + "args": { + "External id": 994956,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537708.712, "dur": 0.077, + "args": { + "External id": 994957,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537709.196, "dur": 0.079, + "args": { + "External id": 994958,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537709.729, "dur": 0.077, + "args": { + "External id": 994959,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537710.281, "dur": 0.081, + "args": { + "External id": 994960,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537710.807, "dur": 0.080, + "args": { + "External id": 994961,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537711.343, "dur": 0.106, + "args": { + "External id": 994962,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537711.792, "dur": 0.270, + "args": { + "External id": 994963,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537712.473, "dur": 0.257, + "args": { + "External id": 994964,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537713.092, "dur": 0.079, + "args": { + "External id": 994965,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537713.534, "dur": 0.087, + "args": { + "External id": 994966,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537713.914, "dur": 0.071, + "args": { + "External id": 994967,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537714.590, "dur": 0.086, + "args": { + "External id": 994968,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537715.036, "dur": 0.255, + "args": { + "External id": 994969,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537715.809, "dur": 0.240, + "args": { + "External id": 994970,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537716.336, "dur": 0.098, + "args": { + "External id": 994971,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537716.943, "dur": 0.116, + "args": { + "External id": 994972,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537717.641, "dur": 0.079, + "args": { + "External id": 994973,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537718.209, "dur": 0.082, + "args": { + "External id": 994974,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537718.765, "dur": 0.082, + "args": { + "External id": 994975,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537719.312, "dur": 0.077, + "args": { + "External id": 994976,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537719.680, "dur": 0.076, + "args": { + "External id": 994977,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537720.228, "dur": 0.086, + "args": { + "External id": 994978,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537720.610, "dur": 0.076, + "args": { + "External id": 994979,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537721.163, "dur": 0.077, + "args": { + "External id": 994980,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537721.527, "dur": 0.090, + "args": { + "External id": 994981,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537722.129, "dur": 0.074, + "args": { + "External id": 994982,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537722.487, "dur": 0.062, + "args": { + "External id": 994983,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537723.023, "dur": 0.073, + "args": { + "External id": 994984,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537723.389, "dur": 0.062, + "args": { + "External id": 994985,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537723.910, "dur": 0.080, + "args": { + "External id": 994986,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537724.278, "dur": 0.068, + "args": { + "External id": 994987,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537724.749, "dur": 0.077, + "args": { + "External id": 994988,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537725.113, "dur": 0.057, + "args": { + "External id": 994989,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537725.605, "dur": 0.078, + "args": { + "External id": 994990,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537725.969, "dur": 0.060, + "args": { + "External id": 994991,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537726.499, "dur": 0.077, + "args": { + "External id": 994992,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537726.865, "dur": 0.064, + "args": { + "External id": 994993,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537727.291, "dur": 0.078, + "args": { + "External id": 994994,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537727.656, "dur": 0.064, + "args": { + "External id": 994995,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537728.190, "dur": 0.076, + "args": { + "External id": 994996,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537728.552, "dur": 0.062, + "args": { + "External id": 994997,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537729.040, "dur": 0.079, + "args": { + "External id": 994998,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537729.440, "dur": 0.074, + "args": { + "External id": 994999,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537729.956, "dur": 0.079, + "args": { + "External id": 995000,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537730.324, "dur": 0.066, + "args": { + "External id": 995001,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537730.888, "dur": 0.081, + "args": { + "External id": 995002,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537731.255, "dur": 0.061, + "args": { + "External id": 995003,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537731.740, "dur": 0.081, + "args": { + "External id": 995004,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537732.100, "dur": 0.063, + "args": { + "External id": 995005,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537732.633, "dur": 0.078, + "args": { + "External id": 995006,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537732.993, "dur": 0.062, + "args": { + "External id": 995007,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537733.520, "dur": 0.081, + "args": { + "External id": 995008,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537733.886, "dur": 0.063, + "args": { + "External id": 995009,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537734.347, "dur": 0.082, + "args": { + "External id": 995010,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537734.716, "dur": 0.060, + "args": { + "External id": 995011,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537735.192, "dur": 0.078, + "args": { + "External id": 995012,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537735.550, "dur": 0.059, + "args": { + "External id": 995013,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537736.167, "dur": 0.077, + "args": { + "External id": 995014,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537736.548, "dur": 0.269, + "args": { + "External id": 995015,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537737.276, "dur": 0.431, + "args": { + "External id": 995016,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537738.107, "dur": 0.318, + "args": { + "External id": 995017,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537738.883, "dur": 0.264, + "args": { + "External id": 995018,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537739.444, "dur": 0.341, + "args": { + "External id": 995019,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537740.320, "dur": 0.197, + "args": { + "External id": 995020,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537740.821, "dur": 0.067, + "args": { + "External id": 995021,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537741.329, "dur": 0.076, + "args": { + "External id": 995022,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537741.712, "dur": 0.064, + "args": { + "External id": 995023,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537742.241, "dur": 0.081, + "args": { + "External id": 995024,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537742.623, "dur": 0.060, + "args": { + "External id": 995025,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537743.125, "dur": 0.078, + "args": { + "External id": 995026,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537743.513, "dur": 0.059, + "args": { + "External id": 995027,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537744.028, "dur": 0.081, + "args": { + "External id": 995028,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537744.419, "dur": 0.058, + "args": { + "External id": 995029,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537744.961, "dur": 0.078, + "args": { + "External id": 995030,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537745.345, "dur": 0.060, + "args": { + "External id": 995031,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537745.837, "dur": 0.076, + "args": { + "External id": 995032,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537746.218, "dur": 0.061, + "args": { + "External id": 995033,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537746.700, "dur": 0.078, + "args": { + "External id": 995034,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537747.086, "dur": 0.057, + "args": { + "External id": 995035,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537747.574, "dur": 0.075, + "args": { + "External id": 995036,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537747.975, "dur": 0.059, + "args": { + "External id": 995037,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537748.476, "dur": 0.079, + "args": { + "External id": 995038,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537748.862, "dur": 0.063, + "args": { + "External id": 995039,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537749.363, "dur": 0.079, + "args": { + "External id": 995040,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537749.749, "dur": 0.059, + "args": { + "External id": 995041,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537750.262, "dur": 0.081, + "args": { + "External id": 995042,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537750.644, "dur": 0.064, + "args": { + "External id": 995043,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537751.108, "dur": 0.075, + "args": { + "External id": 995044,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537751.489, "dur": 0.058, + "args": { + "External id": 995045,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537751.980, "dur": 0.079, + "args": { + "External id": 995046,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537752.363, "dur": 0.066, + "args": { + "External id": 995047,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537752.918, "dur": 0.058, + "args": { + "External id": 995048,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537753.294, "dur": 0.060, + "args": { + "External id": 995049,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537753.755, "dur": 0.079, + "args": { + "External id": 995050,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537754.134, "dur": 0.060, + "args": { + "External id": 995051,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537754.604, "dur": 0.081, + "args": { + "External id": 995052,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537754.990, "dur": 0.065, + "args": { + "External id": 995053,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537755.503, "dur": 0.074, + "args": { + "External id": 995054,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537755.883, "dur": 0.060, + "args": { + "External id": 995055,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537756.421, "dur": 0.078, + "args": { + "External id": 995056,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537756.805, "dur": 0.064, + "args": { + "External id": 995057,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537757.327, "dur": 0.076, + "args": { + "External id": 995058,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537757.826, "dur": 0.075, + "args": { + "External id": 995059,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537758.437, "dur": 0.079, + "args": { + "External id": 995060,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537758.824, "dur": 0.067, + "args": { + "External id": 995061,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537759.320, "dur": 0.076, + "args": { + "External id": 995062,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537759.694, "dur": 0.064, + "args": { + "External id": 995063,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537760.201, "dur": 0.081, + "args": { + "External id": 995064,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537760.583, "dur": 0.066, + "args": { + "External id": 995065,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537761.109, "dur": 0.080, + "args": { + "External id": 995066,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537761.494, "dur": 0.064, + "args": { + "External id": 995067,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537761.959, "dur": 0.077, + "args": { + "External id": 995068,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537762.337, "dur": 0.066, + "args": { + "External id": 995069,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537762.805, "dur": 0.078, + "args": { + "External id": 995070,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537763.184, "dur": 0.065, + "args": { + "External id": 995071,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537763.593, "dur": 0.096, + "args": { + "External id": 995072,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537763.987, "dur": 0.314, + "args": { + "External id": 995073,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537764.754, "dur": 0.298, + "args": { + "External id": 995074,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537765.349, "dur": 0.098, + "args": { + "External id": 995075,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537765.841, "dur": 0.117, + "args": { + "External id": 995076,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537766.263, "dur": 0.060, + "args": { + "External id": 995077,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537766.775, "dur": 0.284, + "args": { + "External id": 995078,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537767.366, "dur": 0.277, + "args": { + "External id": 995079,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537768.096, "dur": 0.305, + "args": { + "External id": 995080,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537768.705, "dur": 0.071, + "args": { + "External id": 995081,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537769.218, "dur": 0.101, + "args": { + "External id": 995082,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537769.623, "dur": 0.066, + "args": { + "External id": 995083,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537770.175, "dur": 0.105, + "args": { + "External id": 995084,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537770.591, "dur": 0.061, + "args": { + "External id": 995085,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537771.034, "dur": 0.080, + "args": { + "External id": 995086,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537771.414, "dur": 0.061, + "args": { + "External id": 995087,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537771.849, "dur": 0.081, + "args": { + "External id": 995088,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537772.234, "dur": 0.058, + "args": { + "External id": 995089,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537772.736, "dur": 0.076, + "args": { + "External id": 995090,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537773.123, "dur": 0.097, + "args": { + "External id": 995091,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537773.739, "dur": 0.104, + "args": { + "External id": 995092,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537774.145, "dur": 0.060, + "args": { + "External id": 995093,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537774.648, "dur": 0.093, + "args": { + "External id": 995094,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537775.049, "dur": 0.086, + "args": { + "External id": 995095,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537775.574, "dur": 0.092, + "args": { + "External id": 995096,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537775.972, "dur": 0.413, + "args": { + "External id": 995097,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537776.709, "dur": 0.315, + "args": { + "External id": 995098,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537777.347, "dur": 0.063, + "args": { + "External id": 995099,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537777.866, "dur": 0.079, + "args": { + "External id": 995100,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537778.443, "dur": 0.073, + "args": { + "External id": 995101,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537778.963, "dur": 0.077, + "args": { + "External id": 995102,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537779.446, "dur": 0.075, + "args": { + "External id": 995103,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537779.945, "dur": 0.081, + "args": { + "External id": 995104,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537780.330, "dur": 0.079, + "args": { + "External id": 995105,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537780.849, "dur": 0.081, + "args": { + "External id": 995106,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537781.333, "dur": 0.082, + "args": { + "External id": 995107,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537781.803, "dur": 0.080, + "args": { + "External id": 995108,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537782.183, "dur": 0.065, + "args": { + "External id": 995109,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537782.655, "dur": 0.075, + "args": { + "External id": 995110,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537783.038, "dur": 0.063, + "args": { + "External id": 995111,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537783.550, "dur": 0.076, + "args": { + "External id": 995112,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537783.933, "dur": 0.061, + "args": { + "External id": 995113,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537784.449, "dur": 0.080, + "args": { + "External id": 995114,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537784.834, "dur": 0.057, + "args": { + "External id": 995115,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537785.350, "dur": 0.075, + "args": { + "External id": 995116,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537785.730, "dur": 0.060, + "args": { + "External id": 995117,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537786.225, "dur": 0.078, + "args": { + "External id": 995118,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537786.607, "dur": 0.060, + "args": { + "External id": 995119,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537787.134, "dur": 0.071, + "args": { + "External id": 995120,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537787.550, "dur": 0.064, + "args": { + "External id": 995121,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537788.058, "dur": 0.076, + "args": { + "External id": 995122,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537788.436, "dur": 0.061, + "args": { + "External id": 995123,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537788.896, "dur": 0.074, + "args": { + "External id": 995124,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537789.277, "dur": 0.060, + "args": { + "External id": 995125,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537789.700, "dur": 0.076, + "args": { + "External id": 995126,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537790.078, "dur": 0.058, + "args": { + "External id": 995127,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537790.658, "dur": 0.078, + "args": { + "External id": 995128,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537791.039, "dur": 0.059, + "args": { + "External id": 995129,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537791.485, "dur": 0.078, + "args": { + "External id": 995130,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537791.868, "dur": 0.060, + "args": { + "External id": 995131,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537792.421, "dur": 0.075, + "args": { + "External id": 995132,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537792.800, "dur": 0.065, + "args": { + "External id": 995133,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537793.373, "dur": 0.075, + "args": { + "External id": 995134,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537793.751, "dur": 0.062, + "args": { + "External id": 995135,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537794.502, "dur": 0.074, + "args": { + "External id": 995136,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537794.877, "dur": 0.062, + "args": { + "External id": 995137,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537795.377, "dur": 0.079, + "args": { + "External id": 995138,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537795.779, "dur": 0.062, + "args": { + "External id": 995139,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537796.270, "dur": 0.083, + "args": { + "External id": 995140,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537796.655, "dur": 0.061, + "args": { + "External id": 995141,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537797.098, "dur": 0.080, + "args": { + "External id": 995142,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537797.480, "dur": 0.067, + "args": { + "External id": 995143,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537797.943, "dur": 0.075, + "args": { + "External id": 995144,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537798.450, "dur": 0.081, + "args": { + "External id": 995145,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537799.003, "dur": 0.083, + "args": { + "External id": 995146,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537799.386, "dur": 0.292, + "args": { + "External id": 995147,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537800.166, "dur": 0.314, + "args": { + "External id": 995148,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537800.887, "dur": 0.079, + "args": { + "External id": 995149,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537801.446, "dur": 0.263, + "args": { + "External id": 995150,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537802.010, "dur": 0.072, + "args": { + "External id": 995151,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537802.410, "dur": 0.100, + "args": { + "External id": 995152,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537802.796, "dur": 0.057, + "args": { + "External id": 995153,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537803.383, "dur": 0.304, + "args": { + "External id": 995154,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537803.983, "dur": 0.092, + "args": { + "External id": 995155,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537804.503, "dur": 0.129, + "args": { + "External id": 995156,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537804.923, "dur": 0.060, + "args": { + "External id": 995157,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537805.489, "dur": 0.285, + "args": { + "External id": 995158,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537806.060, "dur": 0.062, + "args": { + "External id": 995159,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537806.576, "dur": 0.112, + "args": { + "External id": 995160,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537806.974, "dur": 0.403, + "args": { + "External id": 995161,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338710, "tid": 2338710, + "ts": 6345942537807.988, "dur": 0.300, + "args": { + "External id": 995162,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_fused_adamw_", "pid": 2338710, "tid": 2338710, + "ts": 6345942538506.145, "dur": 4009.428, + "args": { + "External id": 995163,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "9.3476373648457231e-06", "0.90000000000000002", "0.94999999999999996", "0.10000000000000001", "1.0000000000000001e-15", "False", "False", "", ""], "Input type": ["TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 21919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_fused_adamw_", "pid": 2338710, "tid": 2338710, + "ts": 6345942541223.235, "dur": 1075.980, + "args": { + "External id": 995164,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "9.3476373648457231e-06", "0.90000000000000002", "0.94999999999999996", "0.10000000000000001", "1.0000000000000001e-15", "False", "False", "", ""], "Input type": ["TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 21920 + } + }, + { + "name": "process_name", "ph": "M", "ts": 6345936068365.073, "pid": 2338710, "tid": 0, + "args": { + "name": "python3.12" + } + }, + { + "name": "process_labels", "ph": "M", "ts": 6345936068365.073, "pid": 2338710, "tid": 0, + "args": { + "labels": "CPU" + } + }, + { + "name": "process_sort_index", "ph": "M", "ts": 6345936068365.073, "pid": 2338710, "tid": 0, + "args": { + "sort_index": 2338710 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 6345936068365.073, "pid": 2338710, "tid": 2379450, + "args": { + "name": "thread 2379450 (pt_autograd_4)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 6345936068365.073, "pid": 2338710, "tid": 2379450, + "args": { + "sort_index": 2379450 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 6345936068365.073, "pid": 2338710, "tid": 2379450, + "args": { + "name": "thread 2379450 (python3.12)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 6345936068365.073, "pid": 2338710, "tid": 2379450, + "args": { + "sort_index": 2379450 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 6345936068365.073, "pid": 2338710, "tid": 2338710, + "args": { + "name": "thread 2338710 (python3.12)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 6345936068365.073, "pid": 2338710, "tid": 2338710, + "args": { + "sort_index": 2338710 + } + }, + { + "ph": "X", "cat": "Trace", "ts": 6345936068282.696, "dur": 6480957.436, + "pid": "Spans", "tid": "PyTorch Profiler", + "name": "PyTorch Profiler (0)", + "args": { + "Op count": 0 + } + }, + { + "name": "process_sort_index", "ph": "M", "ts": 6345936068282.696, + "pid": "Spans", "tid": 0, + "args": { + "sort_index": 536870912 + } + }, + { + "name": "Iteration Start: PyTorch Profiler", "ph": "i", "s": "g", + "pid": "Traces", "tid": "Trace PyTorch Profiler", "ts": 6345936068282.696 + }, + { + "name": "Record Window End", "ph": "i", "s": "g", + "pid": "", "tid": "", "ts": 6345942656256.967 + } + ], + "traceName": "exp/mtp.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine/profile_trace/iteration_22528/rank4_trace.json", + "displayTimeUnit": "ms", + "baseTimeNanoseconds": 1751410836000000000 +} \ No newline at end of file